Total coverage: 226557 (13%)of 1881756
4 4 3 19 19 20 19 18 1 1 1 6 30 1 1 1 1 1 1 1 1 1 1 1 1 175 1 1 187 188 190 190 20 20 20 4 1 1 1 1 8 3 5 1 1 18 16 15 16 15 5 23 1 1 22 1 19 1 2 1 19 1 1 11 11 4 7 8 2 2 11 11 10 26 1 12 13 52 54 3 48 49 5 1 10 1 4 1 4 5 5 1 8 1 2 1 1 7 1 6 2 4 3 4 12 1 27 40 14 7 9 1 15 14 6 4 4 6 40 3 1 24 14 2 23 14 35 1 1 43 1 1 27 14 2 2 6 2 8 8 8 8 7 9 1 8 21 9 1 11 1 15 23 9 1 12 19 13 35 1 50 1 48 46 12 4 1 7 10 47 3 1 2 73 4 1 66 1 1 2 64 39 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 // SPDX-License-Identifier: GPL-2.0+ /* * 2002-10-15 Posix Clocks & timers * by George Anzinger george@mvista.com * Copyright (C) 2002 2003 by MontaVista Software. * * 2004-06-01 Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug. * Copyright (C) 2004 Boris Hu * * These are all the functions necessary to implement POSIX clocks & timers */ #include <linux/compat.h> #include <linux/compiler.h> #include <linux/init.h> #include <linux/jhash.h> #include <linux/interrupt.h> #include <linux/list.h> #include <linux/memblock.h> #include <linux/nospec.h> #include <linux/posix-clock.h> #include <linux/posix-timers.h> #include <linux/prctl.h> #include <linux/sched/task.h> #include <linux/slab.h> #include <linux/syscalls.h> #include <linux/time.h> #include <linux/time_namespace.h> #include <linux/uaccess.h> #include "timekeeping.h" #include "posix-timers.h" /* * Timers are managed in a hash table for lockless lookup. The hash key is * constructed from current::signal and the timer ID and the timer is * matched against current::signal and the timer ID when walking the hash * bucket list. * * This allows checkpoint/restore to reconstruct the exact timer IDs for * a process. */ struct timer_hash_bucket { spinlock_t lock; struct hlist_head head; }; static struct { struct timer_hash_bucket *buckets; unsigned long mask; struct kmem_cache *cache; } __timer_data __ro_after_init __aligned(4*sizeof(long)); #define timer_buckets (__timer_data.buckets) #define timer_hashmask (__timer_data.mask) #define posix_timers_cache (__timer_data.cache) static const struct k_clock * const posix_clocks[]; static const struct k_clock *clockid_to_kclock(const clockid_t id); static const struct k_clock clock_realtime, clock_monotonic; #define TIMER_ANY_ID INT_MIN /* SIGEV_THREAD_ID cannot share a bit with the other SIGEV values. */ #if SIGEV_THREAD_ID != (SIGEV_THREAD_ID & \ ~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD)) #error "SIGEV_THREAD_ID must not share bit with other SIGEV values!" #endif static struct k_itimer *__lock_timer(timer_t timer_id); #define lock_timer(tid) \ ({ struct k_itimer *__timr; \ __cond_lock(&__timr->it_lock, __timr = __lock_timer(tid)); \ __timr; \ }) static inline void unlock_timer(struct k_itimer *timr) { if (likely((timr))) spin_unlock_irq(&timr->it_lock); } #define scoped_timer_get_or_fail(_id) \ scoped_cond_guard(lock_timer, return -EINVAL, _id) #define scoped_timer (scope) DEFINE_CLASS(lock_timer, struct k_itimer *, unlock_timer(_T), __lock_timer(id), timer_t id); DEFINE_CLASS_IS_COND_GUARD(lock_timer); static struct timer_hash_bucket *hash_bucket(struct signal_struct *sig, unsigned int nr) { return &timer_buckets[jhash2((u32 *)&sig, sizeof(sig) / sizeof(u32), nr) & timer_hashmask]; } static struct k_itimer *posix_timer_by_id(timer_t id) { struct signal_struct *sig = current->signal; struct timer_hash_bucket *bucket = hash_bucket(sig, id); struct k_itimer *timer; hlist_for_each_entry_rcu(timer, &bucket->head, t_hash) { /* timer->it_signal can be set concurrently */ if ((READ_ONCE(timer->it_signal) == sig) && (timer->it_id == id)) return timer; } return NULL; } static inline struct signal_struct *posix_sig_owner(const struct k_itimer *timer) { unsigned long val = (unsigned long)timer->it_signal; /* * Mask out bit 0, which acts as invalid marker to prevent * posix_timer_by_id() detecting it as valid. */ return (struct signal_struct *)(val & ~1UL); } static bool posix_timer_hashed(struct timer_hash_bucket *bucket, struct signal_struct *sig, timer_t id) { struct hlist_head *head = &bucket->head; struct k_itimer *timer; hlist_for_each_entry_rcu(timer, head, t_hash, lockdep_is_held(&bucket->lock)) { if ((posix_sig_owner(timer) == sig) && (timer->it_id == id)) return true; } return false; } static bool posix_timer_add_at(struct k_itimer *timer, struct signal_struct *sig, unsigned int id) { struct timer_hash_bucket *bucket = hash_bucket(sig, id); scoped_guard (spinlock, &bucket->lock) { /* * Validate under the lock as this could have raced against * another thread ending up with the same ID, which is * highly unlikely, but possible. */ if (!posix_timer_hashed(bucket, sig, id)) { /* * Set the timer ID and the signal pointer to make * it identifiable in the hash table. The signal * pointer has bit 0 set to indicate that it is not * yet fully initialized. posix_timer_hashed() * masks this bit out, but the syscall lookup fails * to match due to it being set. This guarantees * that there can't be duplicate timer IDs handed * out. */ timer->it_id = (timer_t)id; timer->it_signal = (struct signal_struct *)((unsigned long)sig | 1UL); hlist_add_head_rcu(&timer->t_hash, &bucket->head); return true; } } return false; } static int posix_timer_add(struct k_itimer *timer, int req_id) { struct signal_struct *sig = current->signal; if (unlikely(req_id != TIMER_ANY_ID)) { if (!posix_timer_add_at(timer, sig, req_id)) return -EBUSY; /* * Move the ID counter past the requested ID, so that after * switching back to normal mode the IDs are outside of the * exact allocated region. That avoids ID collisions on the * next regular timer_create() invocations. */ atomic_set(&sig->next_posix_timer_id, req_id + 1); return req_id; } for (unsigned int cnt = 0; cnt <= INT_MAX; cnt++) { /* Get the next timer ID and clamp it to positive space */ unsigned int id = atomic_fetch_inc(&sig->next_posix_timer_id) & INT_MAX; if (posix_timer_add_at(timer, sig, id)) return id; cond_resched(); } /* POSIX return code when no timer ID could be allocated */ return -EAGAIN; } static int posix_get_realtime_timespec(clockid_t which_clock, struct timespec64 *tp) { ktime_get_real_ts64(tp); return 0; } static ktime_t posix_get_realtime_ktime(clockid_t which_clock) { return ktime_get_real(); } static int posix_clock_realtime_set(const clockid_t which_clock, const struct timespec64 *tp) { return do_sys_settimeofday64(tp, NULL); } static int posix_clock_realtime_adj(const clockid_t which_clock, struct __kernel_timex *t) { return do_adjtimex(t); } static int posix_get_monotonic_timespec(clockid_t which_clock, struct timespec64 *tp) { ktime_get_ts64(tp); timens_add_monotonic(tp); return 0; } static ktime_t posix_get_monotonic_ktime(clockid_t which_clock) { return ktime_get(); } static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec64 *tp) { ktime_get_raw_ts64(tp); timens_add_monotonic(tp); return 0; } static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec64 *tp) { ktime_get_coarse_real_ts64(tp); return 0; } static int posix_get_monotonic_coarse(clockid_t which_clock, struct timespec64 *tp) { ktime_get_coarse_ts64(tp); timens_add_monotonic(tp); return 0; } static int posix_get_coarse_res(const clockid_t which_clock, struct timespec64 *tp) { *tp = ktime_to_timespec64(KTIME_LOW_RES); return 0; } static int posix_get_boottime_timespec(const clockid_t which_clock, struct timespec64 *tp) { ktime_get_boottime_ts64(tp); timens_add_boottime(tp); return 0; } static ktime_t posix_get_boottime_ktime(const clockid_t which_clock) { return ktime_get_boottime(); } static int posix_get_tai_timespec(clockid_t which_clock, struct timespec64 *tp) { ktime_get_clocktai_ts64(tp); return 0; } static ktime_t posix_get_tai_ktime(clockid_t which_clock) { return ktime_get_clocktai(); } static int posix_get_hrtimer_res(clockid_t which_clock, struct timespec64 *tp) { tp->tv_sec = 0; tp->tv_nsec = hrtimer_resolution; return 0; } /* * The siginfo si_overrun field and the return value of timer_getoverrun(2) * are of type int. Clamp the overrun value to INT_MAX */ static inline int timer_overrun_to_int(struct k_itimer *timr) { if (timr->it_overrun_last > (s64)INT_MAX) return INT_MAX; return (int)timr->it_overrun_last; } static void common_hrtimer_rearm(struct k_itimer *timr) { struct hrtimer *timer = &timr->it.real.timer; timr->it_overrun += hrtimer_forward_now(timer, timr->it_interval); hrtimer_restart(timer); } static bool __posixtimer_deliver_signal(struct kernel_siginfo *info, struct k_itimer *timr) { guard(spinlock)(&timr->it_lock); /* * Check if the timer is still alive or whether it got modified * since the signal was queued. In either case, don't rearm and * drop the signal. */ if (timr->it_signal_seq != timr->it_sigqueue_seq || WARN_ON_ONCE(!posixtimer_valid(timr))) return false; if (!timr->it_interval || WARN_ON_ONCE(timr->it_status != POSIX_TIMER_REQUEUE_PENDING)) return true; timr->kclock->timer_rearm(timr); timr->it_status = POSIX_TIMER_ARMED; timr->it_overrun_last = timr->it_overrun; timr->it_overrun = -1LL; ++timr->it_signal_seq; info->si_overrun = timer_overrun_to_int(timr); return true; } /* * This function is called from the signal delivery code. It decides * whether the signal should be dropped and rearms interval timers. The * timer can be unconditionally accessed as there is a reference held on * it. */ bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq) { struct k_itimer *timr = container_of(timer_sigq, struct k_itimer, sigq); bool ret; /* * Release siglock to ensure proper locking order versus * timr::it_lock. Keep interrupts disabled. */ spin_unlock(&current->sighand->siglock); ret = __posixtimer_deliver_signal(info, timr); /* Drop the reference which was acquired when the signal was queued */ posixtimer_putref(timr); spin_lock(&current->sighand->siglock); return ret; } void posix_timer_queue_signal(struct k_itimer *timr) { lockdep_assert_held(&timr->it_lock); if (!posixtimer_valid(timr)) return; timr->it_status = timr->it_interval ? POSIX_TIMER_REQUEUE_PENDING : POSIX_TIMER_DISARMED; posixtimer_send_sigqueue(timr); } /* * This function gets called when a POSIX.1b interval timer expires from * the HRTIMER interrupt (soft interrupt on RT kernels). * * Handles CLOCK_REALTIME, CLOCK_MONOTONIC, CLOCK_BOOTTIME and CLOCK_TAI * based timers. */ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) { struct k_itimer *timr = container_of(timer, struct k_itimer, it.real.timer); guard(spinlock_irqsave)(&timr->it_lock); posix_timer_queue_signal(timr); return HRTIMER_NORESTART; } long posixtimer_create_prctl(unsigned long ctrl) { switch (ctrl) { case PR_TIMER_CREATE_RESTORE_IDS_OFF: current->signal->timer_create_restore_ids = 0; return 0; case PR_TIMER_CREATE_RESTORE_IDS_ON: current->signal->timer_create_restore_ids = 1; return 0; case PR_TIMER_CREATE_RESTORE_IDS_GET: return current->signal->timer_create_restore_ids; } return -EINVAL; } static struct pid *good_sigevent(sigevent_t * event) { struct pid *pid = task_tgid(current); struct task_struct *rtn; switch (event->sigev_notify) { case SIGEV_SIGNAL | SIGEV_THREAD_ID: pid = find_vpid(event->sigev_notify_thread_id); rtn = pid_task(pid, PIDTYPE_PID); if (!rtn || !same_thread_group(rtn, current)) return NULL; fallthrough; case SIGEV_SIGNAL: case SIGEV_THREAD: if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX) return NULL; fallthrough; case SIGEV_NONE: return pid; default: return NULL; } } static struct k_itimer *alloc_posix_timer(void) { struct k_itimer *tmr; if (unlikely(!posix_timers_cache)) return NULL; tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL); if (!tmr) return tmr; if (unlikely(!posixtimer_init_sigqueue(&tmr->sigq))) { kmem_cache_free(posix_timers_cache, tmr); return NULL; } rcuref_init(&tmr->rcuref, 1); return tmr; } void posixtimer_free_timer(struct k_itimer *tmr) { put_pid(tmr->it_pid); if (tmr->sigq.ucounts) dec_rlimit_put_ucounts(tmr->sigq.ucounts, UCOUNT_RLIMIT_SIGPENDING); kfree_rcu(tmr, rcu); } static void posix_timer_unhash_and_free(struct k_itimer *tmr) { struct timer_hash_bucket *bucket = hash_bucket(posix_sig_owner(tmr), tmr->it_id); scoped_guard (spinlock, &bucket->lock) hlist_del_rcu(&tmr->t_hash); posixtimer_putref(tmr); } static int common_timer_create(struct k_itimer *new_timer) { hrtimer_setup(&new_timer->it.real.timer, posix_timer_fn, new_timer->it_clock, 0); return 0; } /* Create a POSIX.1b interval timer. */ static int do_timer_create(clockid_t which_clock, struct sigevent *event, timer_t __user *created_timer_id) { const struct k_clock *kc = clockid_to_kclock(which_clock); timer_t req_id = TIMER_ANY_ID; struct k_itimer *new_timer; int error, new_timer_id; if (!kc) return -EINVAL; if (!kc->timer_create) return -EOPNOTSUPP; /* Special case for CRIU to restore timers with a given timer ID. */ if (unlikely(current->signal->timer_create_restore_ids)) { if (copy_from_user(&req_id, created_timer_id, sizeof(req_id))) return -EFAULT; /* Valid IDs are 0..INT_MAX */ if ((unsigned int)req_id > INT_MAX) return -EINVAL; } new_timer = alloc_posix_timer(); if (unlikely(!new_timer)) return -EAGAIN; spin_lock_init(&new_timer->it_lock); /* * Add the timer to the hash table. The timer is not yet valid * after insertion, but has a unique ID allocated. */ new_timer_id = posix_timer_add(new_timer, req_id); if (new_timer_id < 0) { posixtimer_free_timer(new_timer); return new_timer_id; } new_timer->it_clock = which_clock; new_timer->kclock = kc; new_timer->it_overrun = -1LL; if (event) { scoped_guard (rcu) new_timer->it_pid = get_pid(good_sigevent(event)); if (!new_timer->it_pid) { error = -EINVAL; goto out; } new_timer->it_sigev_notify = event->sigev_notify; new_timer->sigq.info.si_signo = event->sigev_signo; new_timer->sigq.info.si_value = event->sigev_value; } else { new_timer->it_sigev_notify = SIGEV_SIGNAL; new_timer->sigq.info.si_signo = SIGALRM; new_timer->sigq.info.si_value.sival_int = new_timer->it_id; new_timer->it_pid = get_pid(task_tgid(current)); } if (new_timer->it_sigev_notify & SIGEV_THREAD_ID) new_timer->it_pid_type = PIDTYPE_PID; else new_timer->it_pid_type = PIDTYPE_TGID; new_timer->sigq.info.si_tid = new_timer->it_id; new_timer->sigq.info.si_code = SI_TIMER; if (copy_to_user(created_timer_id, &new_timer_id, sizeof (new_timer_id))) { error = -EFAULT; goto out; } /* * After successful copy out, the timer ID is visible to user space * now but not yet valid because new_timer::signal low order bit is 1. * * Complete the initialization with the clock specific create * callback. */ error = kc->timer_create(new_timer); if (error) goto out; /* * timer::it_lock ensures that __lock_timer() observes a fully * initialized timer when it observes a valid timer::it_signal. * * sighand::siglock is required to protect signal::posix_timers. */ scoped_guard (spinlock_irq, &new_timer->it_lock) { guard(spinlock)(&current->sighand->siglock); /* * new_timer::it_signal contains the signal pointer with * bit 0 set, which makes it invalid for syscall operations. * Store the unmodified signal pointer to make it valid. */ WRITE_ONCE(new_timer->it_signal, current->signal); hlist_add_head_rcu(&new_timer->list, &current->signal->posix_timers); } /* * After unlocking @new_timer is subject to concurrent removal and * cannot be touched anymore */ return 0; out: posix_timer_unhash_and_free(new_timer); return error; } SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, struct sigevent __user *, timer_event_spec, timer_t __user *, created_timer_id) { if (timer_event_spec) { sigevent_t event; if (copy_from_user(&event, timer_event_spec, sizeof (event))) return -EFAULT; return do_timer_create(which_clock, &event, created_timer_id); } return do_timer_create(which_clock, NULL, created_timer_id); } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE3(timer_create, clockid_t, which_clock, struct compat_sigevent __user *, timer_event_spec, timer_t __user *, created_timer_id) { if (timer_event_spec) { sigevent_t event; if (get_compat_sigevent(&event, timer_event_spec)) return -EFAULT; return do_timer_create(which_clock, &event, created_timer_id); } return do_timer_create(which_clock, NULL, created_timer_id); } #endif static struct k_itimer *__lock_timer(timer_t timer_id) { struct k_itimer *timr; /* * timer_t could be any type >= int and we want to make sure any * @timer_id outside positive int range fails lookup. */ if ((unsigned long long)timer_id > INT_MAX) return NULL; /* * The hash lookup and the timers are RCU protected. * * Timers are added to the hash in invalid state where * timr::it_signal is marked invalid. timer::it_signal is only set * after the rest of the initialization succeeded. * * Timer destruction happens in steps: * 1) Set timr::it_signal marked invalid with timr::it_lock held * 2) Release timr::it_lock * 3) Remove from the hash under hash_lock * 4) Put the reference count. * * The reference count might not drop to zero if timr::sigq is * queued. In that case the signal delivery or flush will put the * last reference count. * * When the reference count reaches zero, the timer is scheduled * for RCU removal after the grace period. * * Holding rcu_read_lock() across the lookup ensures that * the timer cannot be freed. * * The lookup validates locklessly that timr::it_signal == * current::it_signal and timr::it_id == @timer_id. timr::it_id * can't change, but timr::it_signal can become invalid during * destruction, which makes the locked check fail. */ guard(rcu)(); timr = posix_timer_by_id(timer_id); if (timr) { spin_lock_irq(&timr->it_lock); /* * Validate under timr::it_lock that timr::it_signal is * still valid. Pairs with #1 above. */ if (timr->it_signal == current->signal) return timr; spin_unlock_irq(&timr->it_lock); } return NULL; } static ktime_t common_hrtimer_remaining(struct k_itimer *timr, ktime_t now) { struct hrtimer *timer = &timr->it.real.timer; return __hrtimer_expires_remaining_adjusted(timer, now); } static s64 common_hrtimer_forward(struct k_itimer *timr, ktime_t now) { struct hrtimer *timer = &timr->it.real.timer; return hrtimer_forward(timer, now, timr->it_interval); } /* * Get the time remaining on a POSIX.1b interval timer. * * Two issues to handle here: * * 1) The timer has a requeue pending. The return value must appear as * if the timer has been requeued right now. * * 2) The timer is a SIGEV_NONE timer. These timers are never enqueued * into the hrtimer queue and therefore never expired. Emulate expiry * here taking #1 into account. */ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting) { const struct k_clock *kc = timr->kclock; ktime_t now, remaining, iv; bool sig_none; sig_none = timr->it_sigev_notify == SIGEV_NONE; iv = timr->it_interval; /* interval timer ? */ if (iv) { cur_setting->it_interval = ktime_to_timespec64(iv); } else if (timr->it_status == POSIX_TIMER_DISARMED) { /* * SIGEV_NONE oneshot timers are never queued and therefore * timr->it_status is always DISARMED. The check below * vs. remaining time will handle this case. * * For all other timers there is nothing to update here, so * return. */ if (!sig_none) return; } now = kc->clock_get_ktime(timr->it_clock); /* * If this is an interval timer and either has requeue pending or * is a SIGEV_NONE timer move the expiry time forward by intervals, * so expiry is > now. */ if (iv && timr->it_status != POSIX_TIMER_ARMED) timr->it_overrun += kc->timer_forward(timr, now); remaining = kc->timer_remaining(timr, now); /* * As @now is retrieved before a possible timer_forward() and * cannot be reevaluated by the compiler @remaining is based on the * same @now value. Therefore @remaining is consistent vs. @now. * * Consequently all interval timers, i.e. @iv > 0, cannot have a * remaining time <= 0 because timer_forward() guarantees to move * them forward so that the next timer expiry is > @now. */ if (remaining <= 0) { /* * A single shot SIGEV_NONE timer must return 0, when it is * expired! Timers which have a real signal delivery mode * must return a remaining time greater than 0 because the * signal has not yet been delivered. */ if (!sig_none) cur_setting->it_value.tv_nsec = 1; } else { cur_setting->it_value = ktime_to_timespec64(remaining); } } static int do_timer_gettime(timer_t timer_id, struct itimerspec64 *setting) { memset(setting, 0, sizeof(*setting)); scoped_timer_get_or_fail(timer_id) scoped_timer->kclock->timer_get(scoped_timer, setting); return 0; } /* Get the time remaining on a POSIX.1b interval timer. */ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, struct __kernel_itimerspec __user *, setting) { struct itimerspec64 cur_setting; int ret = do_timer_gettime(timer_id, &cur_setting); if (!ret) { if (put_itimerspec64(&cur_setting, setting)) ret = -EFAULT; } return ret; } #ifdef CONFIG_COMPAT_32BIT_TIME SYSCALL_DEFINE2(timer_gettime32, timer_t, timer_id, struct old_itimerspec32 __user *, setting) { struct itimerspec64 cur_setting; int ret = do_timer_gettime(timer_id, &cur_setting); if (!ret) { if (put_old_itimerspec32(&cur_setting, setting)) ret = -EFAULT; } return ret; } #endif /** * sys_timer_getoverrun - Get the number of overruns of a POSIX.1b interval timer * @timer_id: The timer ID which identifies the timer * * The "overrun count" of a timer is one plus the number of expiration * intervals which have elapsed between the first expiry, which queues the * signal and the actual signal delivery. On signal delivery the "overrun * count" is calculated and cached, so it can be returned directly here. * * As this is relative to the last queued signal the returned overrun count * is meaningless outside of the signal delivery path and even there it * does not accurately reflect the current state when user space evaluates * it. * * Returns: * -EINVAL @timer_id is invalid * 1..INT_MAX The number of overruns related to the last delivered signal */ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id) { scoped_timer_get_or_fail(timer_id) return timer_overrun_to_int(scoped_timer); } static void common_hrtimer_arm(struct k_itimer *timr, ktime_t expires, bool absolute, bool sigev_none) { struct hrtimer *timer = &timr->it.real.timer; enum hrtimer_mode mode; mode = absolute ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL; /* * Posix magic: Relative CLOCK_REALTIME timers are not affected by * clock modifications, so they become CLOCK_MONOTONIC based under the * hood. See hrtimer_setup(). Update timr->kclock, so the generic * functions which use timr->kclock->clock_get_*() work. * * Note: it_clock stays unmodified, because the next timer_set() might * use ABSTIME, so it needs to switch back. */ if (timr->it_clock == CLOCK_REALTIME) timr->kclock = absolute ? &clock_realtime : &clock_monotonic; hrtimer_setup(&timr->it.real.timer, posix_timer_fn, timr->it_clock, mode); if (!absolute) expires = ktime_add_safe(expires, hrtimer_cb_get_time(timer)); hrtimer_set_expires(timer, expires); if (!sigev_none) hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } static int common_hrtimer_try_to_cancel(struct k_itimer *timr) { return hrtimer_try_to_cancel(&timr->it.real.timer); } static void common_timer_wait_running(struct k_itimer *timer) { hrtimer_cancel_wait_running(&timer->it.real.timer); } /* * On PREEMPT_RT this prevents priority inversion and a potential livelock * against the ksoftirqd thread in case that ksoftirqd gets preempted while * executing a hrtimer callback. * * See the comments in hrtimer_cancel_wait_running(). For PREEMPT_RT=n this * just results in a cpu_relax(). * * For POSIX CPU timers with CONFIG_POSIX_CPU_TIMERS_TASK_WORK=n this is * just a cpu_relax(). With CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y this * prevents spinning on an eventually scheduled out task and a livelock * when the task which tries to delete or disarm the timer has preempted * the task which runs the expiry in task work context. */ static void timer_wait_running(struct k_itimer *timer) { /* * kc->timer_wait_running() might drop RCU lock. So @timer * cannot be touched anymore after the function returns! */ timer->kclock->timer_wait_running(timer); } /* * Set up the new interval and reset the signal delivery data */ void posix_timer_set_common(struct k_itimer *timer, struct itimerspec64 *new_setting) { if (new_setting->it_value.tv_sec || new_setting->it_value.tv_nsec) timer->it_interval = timespec64_to_ktime(new_setting->it_interval); else timer->it_interval = 0; /* Reset overrun accounting */ timer->it_overrun_last = 0; timer->it_overrun = -1LL; } /* Set a POSIX.1b interval timer. */ int common_timer_set(struct k_itimer *timr, int flags, struct itimerspec64 *new_setting, struct itimerspec64 *old_setting) { const struct k_clock *kc = timr->kclock; bool sigev_none; ktime_t expires; if (old_setting) common_timer_get(timr, old_setting); /* * Careful here. On SMP systems the timer expiry function could be * active and spinning on timr->it_lock. */ if (kc->timer_try_to_cancel(timr) < 0) return TIMER_RETRY; timr->it_status = POSIX_TIMER_DISARMED; posix_timer_set_common(timr, new_setting); /* Keep timer disarmed when it_value is zero */ if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec) return 0; expires = timespec64_to_ktime(new_setting->it_value); if (flags & TIMER_ABSTIME) expires = timens_ktime_to_host(timr->it_clock, expires); sigev_none = timr->it_sigev_notify == SIGEV_NONE; kc->timer_arm(timr, expires, flags & TIMER_ABSTIME, sigev_none); if (!sigev_none) timr->it_status = POSIX_TIMER_ARMED; return 0; } static int do_timer_settime(timer_t timer_id, int tmr_flags, struct itimerspec64 *new_spec64, struct itimerspec64 *old_spec64) { if (!timespec64_valid(&new_spec64->it_interval) || !timespec64_valid(&new_spec64->it_value)) return -EINVAL; if (old_spec64) memset(old_spec64, 0, sizeof(*old_spec64)); for (; ; old_spec64 = NULL) { struct k_itimer *timr; scoped_timer_get_or_fail(timer_id) { timr = scoped_timer; if (old_spec64) old_spec64->it_interval = ktime_to_timespec64(timr->it_interval); /* Prevent signal delivery and rearming. */ timr->it_signal_seq++; int ret = timr->kclock->timer_set(timr, tmr_flags, new_spec64, old_spec64); if (ret != TIMER_RETRY) return ret; /* Protect the timer from being freed when leaving the lock scope */ rcu_read_lock(); } timer_wait_running(timr); rcu_read_unlock(); } } /* Set a POSIX.1b interval timer */ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, const struct __kernel_itimerspec __user *, new_setting, struct __kernel_itimerspec __user *, old_setting) { struct itimerspec64 new_spec, old_spec, *rtn; int error = 0; if (!new_setting) return -EINVAL; if (get_itimerspec64(&new_spec, new_setting)) return -EFAULT; rtn = old_setting ? &old_spec : NULL; error = do_timer_settime(timer_id, flags, &new_spec, rtn); if (!error && old_setting) { if (put_itimerspec64(&old_spec, old_setting)) error = -EFAULT; } return error; } #ifdef CONFIG_COMPAT_32BIT_TIME SYSCALL_DEFINE4(timer_settime32, timer_t, timer_id, int, flags, struct old_itimerspec32 __user *, new, struct old_itimerspec32 __user *, old) { struct itimerspec64 new_spec, old_spec; struct itimerspec64 *rtn = old ? &old_spec : NULL; int error = 0; if (!new) return -EINVAL; if (get_old_itimerspec32(&new_spec, new)) return -EFAULT; error = do_timer_settime(timer_id, flags, &new_spec, rtn); if (!error && old) { if (put_old_itimerspec32(&old_spec, old)) error = -EFAULT; } return error; } #endif int common_timer_del(struct k_itimer *timer) { const struct k_clock *kc = timer->kclock; if (kc->timer_try_to_cancel(timer) < 0) return TIMER_RETRY; timer->it_status = POSIX_TIMER_DISARMED; return 0; } /* * If the deleted timer is on the ignored list, remove it and * drop the associated reference. */ static inline void posix_timer_cleanup_ignored(struct k_itimer *tmr) { if (!hlist_unhashed(&tmr->ignored_list)) { hlist_del_init(&tmr->ignored_list); posixtimer_putref(tmr); } } static void posix_timer_delete(struct k_itimer *timer) { /* * Invalidate the timer, remove it from the linked list and remove * it from the ignored list if pending. * * The invalidation must be written with siglock held so that the * signal code observes the invalidated timer::it_signal in * do_sigaction(), which prevents it from moving a pending signal * of a deleted timer to the ignore list. * * The invalidation also prevents signal queueing, signal delivery * and therefore rearming from the signal delivery path. * * A concurrent lookup can still find the timer in the hash, but it * will check timer::it_signal with timer::it_lock held and observe * bit 0 set, which invalidates it. That also prevents the timer ID * from being handed out before this timer is completely gone. */ timer->it_signal_seq++; scoped_guard (spinlock, &current->sighand->siglock) { unsigned long sig = (unsigned long)timer->it_signal | 1UL; WRITE_ONCE(timer->it_signal, (struct signal_struct *)sig); hlist_del_rcu(&timer->list); posix_timer_cleanup_ignored(timer); } while (timer->kclock->timer_del(timer) == TIMER_RETRY) { guard(rcu)(); spin_unlock_irq(&timer->it_lock); timer_wait_running(timer); spin_lock_irq(&timer->it_lock); } } /* Delete a POSIX.1b interval timer. */ SYSCALL_DEFINE1(timer_delete, timer_t, timer_id) { struct k_itimer *timer; scoped_timer_get_or_fail(timer_id) { timer = scoped_timer; posix_timer_delete(timer); } /* Remove it from the hash, which frees up the timer ID */ posix_timer_unhash_and_free(timer); return 0; } /* * Invoked from do_exit() when the last thread of a thread group exits. * At that point no other task can access the timers of the dying * task anymore. */ void exit_itimers(struct task_struct *tsk) { struct hlist_head timers; struct hlist_node *next; struct k_itimer *timer; /* Clear restore mode for exec() */ tsk->signal->timer_create_restore_ids = 0; if (hlist_empty(&tsk->signal->posix_timers)) return; /* Protect against concurrent read via /proc/$PID/timers */ scoped_guard (spinlock_irq, &tsk->sighand->siglock) hlist_move_list(&tsk->signal->posix_timers, &timers); /* The timers are not longer accessible via tsk::signal */ hlist_for_each_entry_safe(timer, next, &timers, list) { scoped_guard (spinlock_irq, &timer->it_lock) posix_timer_delete(timer); posix_timer_unhash_and_free(timer); cond_resched(); } /* * There should be no timers on the ignored list. itimer_delete() has * mopped them up. */ if (!WARN_ON_ONCE(!hlist_empty(&tsk->signal->ignored_posix_timers))) return; hlist_move_list(&tsk->signal->ignored_posix_timers, &timers); while (!hlist_empty(&timers)) { posix_timer_cleanup_ignored(hlist_entry(timers.first, struct k_itimer, ignored_list)); } } SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, const struct __kernel_timespec __user *, tp) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec64 new_tp; if (!kc || !kc->clock_set) return -EINVAL; if (get_timespec64(&new_tp, tp)) return -EFAULT; /* * Permission checks have to be done inside the clock specific * setter callback. */ return kc->clock_set(which_clock, &new_tp); } SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, struct __kernel_timespec __user *, tp) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec64 kernel_tp; int error; if (!kc) return -EINVAL; error = kc->clock_get_timespec(which_clock, &kernel_tp); if (!error && put_timespec64(&kernel_tp, tp)) error = -EFAULT; return error; } int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex * ktx) { const struct k_clock *kc = clockid_to_kclock(which_clock); if (!kc) return -EINVAL; if (!kc->clock_adj) return -EOPNOTSUPP; return kc->clock_adj(which_clock, ktx); } SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock, struct __kernel_timex __user *, utx) { struct __kernel_timex ktx; int err; if (copy_from_user(&ktx, utx, sizeof(ktx))) return -EFAULT; err = do_clock_adjtime(which_clock, &ktx); if (err >= 0 && copy_to_user(utx, &ktx, sizeof(ktx))) return -EFAULT; return err; } /** * sys_clock_getres - Get the resolution of a clock * @which_clock: The clock to get the resolution for * @tp: Pointer to a a user space timespec64 for storage * * POSIX defines: * * "The clock_getres() function shall return the resolution of any * clock. Clock resolutions are implementation-defined and cannot be set by * a process. If the argument res is not NULL, the resolution of the * specified clock shall be stored in the location pointed to by res. If * res is NULL, the clock resolution is not returned. If the time argument * of clock_settime() is not a multiple of res, then the value is truncated * to a multiple of res." * * Due to the various hardware constraints the real resolution can vary * wildly and even change during runtime when the underlying devices are * replaced. The kernel also can use hardware devices with different * resolutions for reading the time and for arming timers. * * The kernel therefore deviates from the POSIX spec in various aspects: * * 1) The resolution returned to user space * * For CLOCK_REALTIME, CLOCK_MONOTONIC, CLOCK_BOOTTIME, CLOCK_TAI, * CLOCK_REALTIME_ALARM, CLOCK_BOOTTIME_ALAREM and CLOCK_MONOTONIC_RAW * the kernel differentiates only two cases: * * I) Low resolution mode: * * When high resolution timers are disabled at compile or runtime * the resolution returned is nanoseconds per tick, which represents * the precision at which timers expire. * * II) High resolution mode: * * When high resolution timers are enabled the resolution returned * is always one nanosecond independent of the actual resolution of * the underlying hardware devices. * * For CLOCK_*_ALARM the actual resolution depends on system * state. When system is running the resolution is the same as the * resolution of the other clocks. During suspend the actual * resolution is the resolution of the underlying RTC device which * might be way less precise than the clockevent device used during * running state. * * For CLOCK_REALTIME_COARSE and CLOCK_MONOTONIC_COARSE the resolution * returned is always nanoseconds per tick. * * For CLOCK_PROCESS_CPUTIME and CLOCK_THREAD_CPUTIME the resolution * returned is always one nanosecond under the assumption that the * underlying scheduler clock has a better resolution than nanoseconds * per tick. * * For dynamic POSIX clocks (PTP devices) the resolution returned is * always one nanosecond. * * 2) Affect on sys_clock_settime() * * The kernel does not truncate the time which is handed in to * sys_clock_settime(). The kernel internal timekeeping is always using * nanoseconds precision independent of the clocksource device which is * used to read the time from. The resolution of that device only * affects the precision of the time returned by sys_clock_gettime(). * * Returns: * 0 Success. @tp contains the resolution * -EINVAL @which_clock is not a valid clock ID * -EFAULT Copying the resolution to @tp faulted * -ENODEV Dynamic POSIX clock is not backed by a device * -EOPNOTSUPP Dynamic POSIX clock does not support getres() */ SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, struct __kernel_timespec __user *, tp) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec64 rtn_tp; int error; if (!kc) return -EINVAL; error = kc->clock_getres(which_clock, &rtn_tp); if (!error && tp && put_timespec64(&rtn_tp, tp)) error = -EFAULT; return error; } #ifdef CONFIG_COMPAT_32BIT_TIME SYSCALL_DEFINE2(clock_settime32, clockid_t, which_clock, struct old_timespec32 __user *, tp) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec64 ts; if (!kc || !kc->clock_set) return -EINVAL; if (get_old_timespec32(&ts, tp)) return -EFAULT; return kc->clock_set(which_clock, &ts); } SYSCALL_DEFINE2(clock_gettime32, clockid_t, which_clock, struct old_timespec32 __user *, tp) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec64 ts; int err; if (!kc) return -EINVAL; err = kc->clock_get_timespec(which_clock, &ts); if (!err && put_old_timespec32(&ts, tp)) err = -EFAULT; return err; } SYSCALL_DEFINE2(clock_adjtime32, clockid_t, which_clock, struct old_timex32 __user *, utp) { struct __kernel_timex ktx; int err; err = get_old_timex32(&ktx, utp); if (err) return err; err = do_clock_adjtime(which_clock, &ktx); if (err >= 0 && put_old_timex32(utp, &ktx)) return -EFAULT; return err; } SYSCALL_DEFINE2(clock_getres_time32, clockid_t, which_clock, struct old_timespec32 __user *, tp) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec64 ts; int err; if (!kc) return -EINVAL; err = kc->clock_getres(which_clock, &ts); if (!err && tp && put_old_timespec32(&ts, tp)) return -EFAULT; return err; } #endif /* * sys_clock_nanosleep() for CLOCK_REALTIME and CLOCK_TAI */ static int common_nsleep(const clockid_t which_clock, int flags, const struct timespec64 *rqtp) { ktime_t texp = timespec64_to_ktime(*rqtp); return hrtimer_nanosleep(texp, flags & TIMER_ABSTIME ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL, which_clock); } /* * sys_clock_nanosleep() for CLOCK_MONOTONIC and CLOCK_BOOTTIME * * Absolute nanosleeps for these clocks are time-namespace adjusted. */ static int common_nsleep_timens(const clockid_t which_clock, int flags, const struct timespec64 *rqtp) { ktime_t texp = timespec64_to_ktime(*rqtp); if (flags & TIMER_ABSTIME) texp = timens_ktime_to_host(which_clock, texp); return hrtimer_nanosleep(texp, flags & TIMER_ABSTIME ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL, which_clock); } SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, const struct __kernel_timespec __user *, rqtp, struct __kernel_timespec __user *, rmtp) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec64 t; if (!kc) return -EINVAL; if (!kc->nsleep) return -EOPNOTSUPP; if (get_timespec64(&t, rqtp)) return -EFAULT; if (!timespec64_valid(&t)) return -EINVAL; if (flags & TIMER_ABSTIME) rmtp = NULL; current->restart_block.fn = do_no_restart_syscall; current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE; current->restart_block.nanosleep.rmtp = rmtp; return kc->nsleep(which_clock, flags, &t); } #ifdef CONFIG_COMPAT_32BIT_TIME SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags, struct old_timespec32 __user *, rqtp, struct old_timespec32 __user *, rmtp) { const struct k_clock *kc = clockid_to_kclock(which_clock); struct timespec64 t; if (!kc) return -EINVAL; if (!kc->nsleep) return -EOPNOTSUPP; if (get_old_timespec32(&t, rqtp)) return -EFAULT; if (!timespec64_valid(&t)) return -EINVAL; if (flags & TIMER_ABSTIME) rmtp = NULL; current->restart_block.fn = do_no_restart_syscall; current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE; current->restart_block.nanosleep.compat_rmtp = rmtp; return kc->nsleep(which_clock, flags, &t); } #endif static const struct k_clock clock_realtime = { .clock_getres = posix_get_hrtimer_res, .clock_get_timespec = posix_get_realtime_timespec, .clock_get_ktime = posix_get_realtime_ktime, .clock_set = posix_clock_realtime_set, .clock_adj = posix_clock_realtime_adj, .nsleep = common_nsleep, .timer_create = common_timer_create, .timer_set = common_timer_set, .timer_get = common_timer_get, .timer_del = common_timer_del, .timer_rearm = common_hrtimer_rearm, .timer_forward = common_hrtimer_forward, .timer_remaining = common_hrtimer_remaining, .timer_try_to_cancel = common_hrtimer_try_to_cancel, .timer_wait_running = common_timer_wait_running, .timer_arm = common_hrtimer_arm, }; static const struct k_clock clock_monotonic = { .clock_getres = posix_get_hrtimer_res, .clock_get_timespec = posix_get_monotonic_timespec, .clock_get_ktime = posix_get_monotonic_ktime, .nsleep = common_nsleep_timens, .timer_create = common_timer_create, .timer_set = common_timer_set, .timer_get = common_timer_get, .timer_del = common_timer_del, .timer_rearm = common_hrtimer_rearm, .timer_forward = common_hrtimer_forward, .timer_remaining = common_hrtimer_remaining, .timer_try_to_cancel = common_hrtimer_try_to_cancel, .timer_wait_running = common_timer_wait_running, .timer_arm = common_hrtimer_arm, }; static const struct k_clock clock_monotonic_raw = { .clock_getres = posix_get_hrtimer_res, .clock_get_timespec = posix_get_monotonic_raw, }; static const struct k_clock clock_realtime_coarse = { .clock_getres = posix_get_coarse_res, .clock_get_timespec = posix_get_realtime_coarse, }; static const struct k_clock clock_monotonic_coarse = { .clock_getres = posix_get_coarse_res, .clock_get_timespec = posix_get_monotonic_coarse, }; static const struct k_clock clock_tai = { .clock_getres = posix_get_hrtimer_res, .clock_get_ktime = posix_get_tai_ktime, .clock_get_timespec = posix_get_tai_timespec, .nsleep = common_nsleep, .timer_create = common_timer_create, .timer_set = common_timer_set, .timer_get = common_timer_get, .timer_del = common_timer_del, .timer_rearm = common_hrtimer_rearm, .timer_forward = common_hrtimer_forward, .timer_remaining = common_hrtimer_remaining, .timer_try_to_cancel = common_hrtimer_try_to_cancel, .timer_wait_running = common_timer_wait_running, .timer_arm = common_hrtimer_arm, }; static const struct k_clock clock_boottime = { .clock_getres = posix_get_hrtimer_res, .clock_get_ktime = posix_get_boottime_ktime, .clock_get_timespec = posix_get_boottime_timespec, .nsleep = common_nsleep_timens, .timer_create = common_timer_create, .timer_set = common_timer_set, .timer_get = common_timer_get, .timer_del = common_timer_del, .timer_rearm = common_hrtimer_rearm, .timer_forward = common_hrtimer_forward, .timer_remaining = common_hrtimer_remaining, .timer_try_to_cancel = common_hrtimer_try_to_cancel, .timer_wait_running = common_timer_wait_running, .timer_arm = common_hrtimer_arm, }; static const struct k_clock * const posix_clocks[] = { [CLOCK_REALTIME] = &clock_realtime, [CLOCK_MONOTONIC] = &clock_monotonic, [CLOCK_PROCESS_CPUTIME_ID] = &clock_process, [CLOCK_THREAD_CPUTIME_ID] = &clock_thread, [CLOCK_MONOTONIC_RAW] = &clock_monotonic_raw, [CLOCK_REALTIME_COARSE] = &clock_realtime_coarse, [CLOCK_MONOTONIC_COARSE] = &clock_monotonic_coarse, [CLOCK_BOOTTIME] = &clock_boottime, [CLOCK_REALTIME_ALARM] = &alarm_clock, [CLOCK_BOOTTIME_ALARM] = &alarm_clock, [CLOCK_TAI] = &clock_tai, #ifdef CONFIG_POSIX_AUX_CLOCKS [CLOCK_AUX ... CLOCK_AUX_LAST] = &clock_aux, #endif }; static const struct k_clock *clockid_to_kclock(const clockid_t id) { clockid_t idx = id; if (id < 0) { return (id & CLOCKFD_MASK) == CLOCKFD ? &clock_posix_dynamic : &clock_posix_cpu; } if (id >= ARRAY_SIZE(posix_clocks)) return NULL; return posix_clocks[array_index_nospec(idx, ARRAY_SIZE(posix_clocks))]; } static int __init posixtimer_init(void) { unsigned long i, size; unsigned int shift; posix_timers_cache = kmem_cache_create("posix_timers_cache", sizeof(struct k_itimer), __alignof__(struct k_itimer), SLAB_ACCOUNT, NULL); if (IS_ENABLED(CONFIG_BASE_SMALL)) size = 512; else size = roundup_pow_of_two(512 * num_possible_cpus()); timer_buckets = alloc_large_system_hash("posixtimers", sizeof(*timer_buckets), size, 0, 0, &shift, NULL, size, size); size = 1UL << shift; timer_hashmask = size - 1; for (i = 0; i < size; i++) { spin_lock_init(&timer_buckets[i].lock); INIT_HLIST_HEAD(&timer_buckets[i].head); } return 0; } core_initcall(posixtimer_init);
2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 // SPDX-License-Identifier: GPL-2.0-or-later /* * TTUSB DEC Driver * * Copyright (C) 2003-2004 Alex Woods <linux-dvb@giblets.org> * IR support by Peter Beutner <p.beutner@gmx.net> */ #include <linux/list.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/usb.h> #include <linux/interrupt.h> #include <linux/firmware.h> #include <linux/crc32.h> #include <linux/init.h> #include <linux/input.h> #include <linux/mutex.h> #include <linux/workqueue.h> #include <media/dmxdev.h> #include <media/dvb_demux.h> #include <media/dvb_frontend.h> #include <media/dvb_net.h> #include "ttusbdecfe.h" static int debug; static int output_pva; static int enable_rc; module_param(debug, int, 0644); MODULE_PARM_DESC(debug, "Turn on/off debugging (default:off)."); module_param(output_pva, int, 0444); MODULE_PARM_DESC(output_pva, "Output PVA from dvr device (default:off)"); module_param(enable_rc, int, 0644); MODULE_PARM_DESC(enable_rc, "Turn on/off IR remote control(default: off)"); DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr); #define dprintk if (debug) printk #define DRIVER_NAME "TechnoTrend/Hauppauge DEC USB" #define COMMAND_PIPE 0x03 #define RESULT_PIPE 0x04 #define IN_PIPE 0x08 #define OUT_PIPE 0x07 #define IRQ_PIPE 0x0A #define COMMAND_PACKET_SIZE 0x3c #define ARM_PACKET_SIZE 0x1000 #define IRQ_PACKET_SIZE 0x8 #define ISO_BUF_COUNT 0x04 #define FRAMES_PER_ISO_BUF 0x04 #define ISO_FRAME_SIZE 0x0380 #define MAX_PVA_LENGTH 6144 enum ttusb_dec_model { TTUSB_DEC2000T, TTUSB_DEC2540T, TTUSB_DEC3000S }; enum ttusb_dec_packet_type { TTUSB_DEC_PACKET_PVA, TTUSB_DEC_PACKET_SECTION, TTUSB_DEC_PACKET_EMPTY }; enum ttusb_dec_interface { TTUSB_DEC_INTERFACE_INITIAL, TTUSB_DEC_INTERFACE_IN, TTUSB_DEC_INTERFACE_OUT }; typedef int (dvb_filter_pes2ts_cb_t) (void *, unsigned char *); struct dvb_filter_pes2ts { unsigned char buf[188]; unsigned char cc; dvb_filter_pes2ts_cb_t *cb; void *priv; }; struct ttusb_dec { enum ttusb_dec_model model; char *model_name; char *firmware_name; int can_playback; /* DVB bits */ struct dvb_adapter adapter; struct dmxdev dmxdev; struct dvb_demux demux; struct dmx_frontend frontend; struct dvb_net dvb_net; struct dvb_frontend* fe; u16 pid[DMX_PES_OTHER]; /* USB bits */ struct usb_device *udev; u8 trans_count; unsigned int command_pipe; unsigned int result_pipe; unsigned int in_pipe; unsigned int out_pipe; unsigned int irq_pipe; enum ttusb_dec_interface interface; struct mutex usb_mutex; void *irq_buffer; struct urb *irq_urb; dma_addr_t irq_dma_handle; void *iso_buffer; struct urb *iso_urb[ISO_BUF_COUNT]; int iso_stream_count; struct mutex iso_mutex; u8 packet[MAX_PVA_LENGTH + 4]; enum ttusb_dec_packet_type packet_type; int packet_state; int packet_length; int packet_payload_length; u16 next_packet_id; int pva_stream_count; int filter_stream_count; struct dvb_filter_pes2ts a_pes2ts; struct dvb_filter_pes2ts v_pes2ts; u8 v_pes[16 + MAX_PVA_LENGTH]; int v_pes_length; int v_pes_postbytes; struct list_head urb_frame_list; struct work_struct urb_bh_work; spinlock_t urb_frame_list_lock; struct dvb_demux_filter *audio_filter; struct dvb_demux_filter *video_filter; struct list_head filter_info_list; spinlock_t filter_info_list_lock; struct input_dev *rc_input_dev; char rc_phys[64]; int active; /* Loaded successfully */ }; struct urb_frame { u8 data[ISO_FRAME_SIZE]; int length; struct list_head urb_frame_list; }; struct filter_info { u8 stream_id; struct dvb_demux_filter *filter; struct list_head filter_info_list; }; static u16 rc_keys[] = { KEY_POWER, KEY_MUTE, KEY_1, KEY_2, KEY_3, KEY_4, KEY_5, KEY_6, KEY_7, KEY_8, KEY_9, KEY_0, KEY_CHANNELUP, KEY_VOLUMEDOWN, KEY_OK, KEY_VOLUMEUP, KEY_CHANNELDOWN, KEY_PREVIOUS, KEY_ESC, KEY_RED, KEY_GREEN, KEY_YELLOW, KEY_BLUE, KEY_OPTION, KEY_M, KEY_RADIO }; static void dvb_filter_pes2ts_init(struct dvb_filter_pes2ts *p2ts, unsigned short pid, dvb_filter_pes2ts_cb_t *cb, void *priv) { unsigned char *buf=p2ts->buf; buf[0]=0x47; buf[1]=(pid>>8); buf[2]=pid&0xff; p2ts->cc=0; p2ts->cb=cb; p2ts->priv=priv; } static int dvb_filter_pes2ts(struct dvb_filter_pes2ts *p2ts, unsigned char *pes, int len, int payload_start) { unsigned char *buf=p2ts->buf; int ret=0, rest; //len=6+((pes[4]<<8)|pes[5]); if (payload_start) buf[1]|=0x40; else buf[1]&=~0x40; while (len>=184) { buf[3]=0x10|((p2ts->cc++)&0x0f); memcpy(buf+4, pes, 184); if ((ret=p2ts->cb(p2ts->priv, buf))) return ret; len-=184; pes+=184; buf[1]&=~0x40; } if (!len) return 0; buf[3]=0x30|((p2ts->cc++)&0x0f); rest=183-len; if (rest) { buf[5]=0x00; if (rest-1) memset(buf+6, 0xff, rest-1); } buf[4]=rest; memcpy(buf+5+rest, pes, len); return p2ts->cb(p2ts->priv, buf); } static void ttusb_dec_set_model(struct ttusb_dec *dec, enum ttusb_dec_model model); static void ttusb_dec_handle_irq( struct urb *urb) { struct ttusb_dec *dec = urb->context; char *buffer = dec->irq_buffer; int retval; int index = buffer[4]; switch(urb->status) { case 0: /*success*/ break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: case -ETIME: /* this urb is dead, cleanup */ dprintk("%s:urb shutting down with status: %d\n", __func__, urb->status); return; default: dprintk("%s:nonzero status received: %d\n", __func__,urb->status); goto exit; } if ((buffer[0] == 0x1) && (buffer[2] == 0x15)) { /* * IR - Event * * this is an fact a bit too simple implementation; * the box also reports a keyrepeat signal * (with buffer[3] == 0x40) in an interval of ~100ms. * But to handle this correctly we had to imlemenent some * kind of timer which signals a 'key up' event if no * keyrepeat signal is received for lets say 200ms. * this should/could be added later ... * for now lets report each signal as a key down and up */ if (index - 1 < ARRAY_SIZE(rc_keys)) { dprintk("%s:rc signal:%d\n", __func__, index); input_report_key(dec->rc_input_dev, rc_keys[index - 1], 1); input_sync(dec->rc_input_dev); input_report_key(dec->rc_input_dev, rc_keys[index - 1], 0); input_sync(dec->rc_input_dev); } } exit: retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) printk("%s - usb_commit_urb failed with result: %d\n", __func__, retval); } static u16 crc16(u16 crc, const u8 *buf, size_t len) { u16 tmp; while (len--) { crc ^= *buf++; crc ^= (u8)crc >> 4; tmp = (u8)crc; crc ^= (tmp ^ (tmp << 1)) << 4; } return crc; } static int ttusb_dec_send_command(struct ttusb_dec *dec, const u8 command, int param_length, const u8 params[], int *result_length, u8 cmd_result[]) { int result, actual_len; u8 *b; dprintk("%s\n", __func__); b = kzalloc(COMMAND_PACKET_SIZE + 4, GFP_KERNEL); if (!b) return -ENOMEM; result = mutex_lock_interruptible(&dec->usb_mutex); if (result) { printk("%s: Failed to lock usb mutex.\n", __func__); goto err_free; } b[0] = 0xaa; b[1] = ++dec->trans_count; b[2] = command; b[3] = param_length; if (params) memcpy(&b[4], params, param_length); if (debug) { printk(KERN_DEBUG "%s: command: %*ph\n", __func__, param_length, b); } result = usb_bulk_msg(dec->udev, dec->command_pipe, b, COMMAND_PACKET_SIZE + 4, &actual_len, 1000); if (result) { printk("%s: command bulk message failed: error %d\n", __func__, result); goto err_mutex_unlock; } result = usb_bulk_msg(dec->udev, dec->result_pipe, b, COMMAND_PACKET_SIZE + 4, &actual_len, 1000); if (result) { printk("%s: result bulk message failed: error %d\n", __func__, result); goto err_mutex_unlock; } else { if (debug) { printk(KERN_DEBUG "%s: result: %*ph\n", __func__, actual_len, b); } if (result_length) *result_length = b[3]; if (cmd_result && b[3] > 0) memcpy(cmd_result, &b[4], b[3]); } err_mutex_unlock: mutex_unlock(&dec->usb_mutex); err_free: kfree(b); return result; } static int ttusb_dec_get_stb_state (struct ttusb_dec *dec, unsigned int *mode, unsigned int *model, unsigned int *version) { u8 c[COMMAND_PACKET_SIZE]; int c_length; int result; __be32 tmp; dprintk("%s\n", __func__); result = ttusb_dec_send_command(dec, 0x08, 0, NULL, &c_length, c); if (result) return result; if (c_length >= 0x0c) { if (mode != NULL) { memcpy(&tmp, c, 4); *mode = ntohl(tmp); } if (model != NULL) { memcpy(&tmp, &c[4], 4); *model = ntohl(tmp); } if (version != NULL) { memcpy(&tmp, &c[8], 4); *version = ntohl(tmp); } return 0; } else { return -ENOENT; } } static int ttusb_dec_audio_pes2ts_cb(void *priv, unsigned char *data) { struct ttusb_dec *dec = priv; dec->audio_filter->feed->cb.ts(data, 188, NULL, 0, &dec->audio_filter->feed->feed.ts, NULL); return 0; } static int ttusb_dec_video_pes2ts_cb(void *priv, unsigned char *data) { struct ttusb_dec *dec = priv; dec->video_filter->feed->cb.ts(data, 188, NULL, 0, &dec->video_filter->feed->feed.ts, NULL); return 0; } static void ttusb_dec_set_pids(struct ttusb_dec *dec) { u8 b[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; __be16 pcr = htons(dec->pid[DMX_PES_PCR]); __be16 audio = htons(dec->pid[DMX_PES_AUDIO]); __be16 video = htons(dec->pid[DMX_PES_VIDEO]); dprintk("%s\n", __func__); memcpy(&b[0], &pcr, 2); memcpy(&b[2], &audio, 2); memcpy(&b[4], &video, 2); ttusb_dec_send_command(dec, 0x50, sizeof(b), b, NULL, NULL); dvb_filter_pes2ts_init(&dec->a_pes2ts, dec->pid[DMX_PES_AUDIO], ttusb_dec_audio_pes2ts_cb, dec); dvb_filter_pes2ts_init(&dec->v_pes2ts, dec->pid[DMX_PES_VIDEO], ttusb_dec_video_pes2ts_cb, dec); dec->v_pes_length = 0; dec->v_pes_postbytes = 0; } static void ttusb_dec_process_pva(struct ttusb_dec *dec, u8 *pva, int length) { if (length < 8) { printk("%s: packet too short - discarding\n", __func__); return; } if (length > 8 + MAX_PVA_LENGTH) { printk("%s: packet too long - discarding\n", __func__); return; } switch (pva[2]) { case 0x01: { /* VideoStream */ int prebytes = pva[5] & 0x03; int postbytes = (pva[5] & 0x0c) >> 2; __be16 v_pes_payload_length; if (output_pva) { dec->video_filter->feed->cb.ts(pva, length, NULL, 0, &dec->video_filter->feed->feed.ts, NULL); return; } if (dec->v_pes_postbytes > 0 && dec->v_pes_postbytes == prebytes) { memcpy(&dec->v_pes[dec->v_pes_length], &pva[12], prebytes); dvb_filter_pes2ts(&dec->v_pes2ts, dec->v_pes, dec->v_pes_length + prebytes, 1); } if (pva[5] & 0x10) { dec->v_pes[7] = 0x80; dec->v_pes[8] = 0x05; dec->v_pes[9] = 0x21 | ((pva[8] & 0xc0) >> 5); dec->v_pes[10] = ((pva[8] & 0x3f) << 2) | ((pva[9] & 0xc0) >> 6); dec->v_pes[11] = 0x01 | ((pva[9] & 0x3f) << 2) | ((pva[10] & 0x80) >> 6); dec->v_pes[12] = ((pva[10] & 0x7f) << 1) | ((pva[11] & 0xc0) >> 7); dec->v_pes[13] = 0x01 | ((pva[11] & 0x7f) << 1); memcpy(&dec->v_pes[14], &pva[12 + prebytes], length - 12 - prebytes); dec->v_pes_length = 14 + length - 12 - prebytes; } else { dec->v_pes[7] = 0x00; dec->v_pes[8] = 0x00; memcpy(&dec->v_pes[9], &pva[8], length - 8); dec->v_pes_length = 9 + length - 8; } dec->v_pes_postbytes = postbytes; if (dec->v_pes[9 + dec->v_pes[8]] == 0x00 && dec->v_pes[10 + dec->v_pes[8]] == 0x00 && dec->v_pes[11 + dec->v_pes[8]] == 0x01) dec->v_pes[6] = 0x84; else dec->v_pes[6] = 0x80; v_pes_payload_length = htons(dec->v_pes_length - 6 + postbytes); memcpy(&dec->v_pes[4], &v_pes_payload_length, 2); if (postbytes == 0) dvb_filter_pes2ts(&dec->v_pes2ts, dec->v_pes, dec->v_pes_length, 1); break; } case 0x02: /* MainAudioStream */ if (output_pva) { dec->audio_filter->feed->cb.ts(pva, length, NULL, 0, &dec->audio_filter->feed->feed.ts, NULL); return; } dvb_filter_pes2ts(&dec->a_pes2ts, &pva[8], length - 8, pva[5] & 0x10); break; default: printk("%s: unknown PVA type: %02x.\n", __func__, pva[2]); break; } } static void ttusb_dec_process_filter(struct ttusb_dec *dec, u8 *packet, int length) { struct list_head *item; struct filter_info *finfo; struct dvb_demux_filter *filter = NULL; unsigned long flags; u8 sid; sid = packet[1]; spin_lock_irqsave(&dec->filter_info_list_lock, flags); for (item = dec->filter_info_list.next; item != &dec->filter_info_list; item = item->next) { finfo = list_entry(item, struct filter_info, filter_info_list); if (finfo->stream_id == sid) { filter = finfo->filter; break; } } spin_unlock_irqrestore(&dec->filter_info_list_lock, flags); if (filter) filter->feed->cb.sec(&packet[2], length - 2, NULL, 0, &filter->filter, NULL); } static void ttusb_dec_process_packet(struct ttusb_dec *dec) { int i; u16 csum = 0; u16 packet_id; if (dec->packet_length % 2) { printk("%s: odd sized packet - discarding\n", __func__); return; } for (i = 0; i < dec->packet_length; i += 2) csum ^= ((dec->packet[i] << 8) + dec->packet[i + 1]); if (csum) { printk("%s: checksum failed - discarding\n", __func__); return; } packet_id = dec->packet[dec->packet_length - 4] << 8; packet_id += dec->packet[dec->packet_length - 3]; if ((packet_id != dec->next_packet_id) && dec->next_packet_id) { printk("%s: warning: lost packets between %u and %u\n", __func__, dec->next_packet_id - 1, packet_id); } if (packet_id == 0xffff) dec->next_packet_id = 0x8000; else dec->next_packet_id = packet_id + 1; switch (dec->packet_type) { case TTUSB_DEC_PACKET_PVA: if (dec->pva_stream_count) ttusb_dec_process_pva(dec, dec->packet, dec->packet_payload_length); break; case TTUSB_DEC_PACKET_SECTION: if (dec->filter_stream_count) ttusb_dec_process_filter(dec, dec->packet, dec->packet_payload_length); break; case TTUSB_DEC_PACKET_EMPTY: break; } } static void swap_bytes(u8 *b, int length) { length -= length % 2; for (; length; b += 2, length -= 2) swap(*b, *(b + 1)); } static void ttusb_dec_process_urb_frame(struct ttusb_dec *dec, u8 *b, int length) { swap_bytes(b, length); while (length) { switch (dec->packet_state) { case 0: case 1: case 2: if (*b++ == 0xaa) dec->packet_state++; else dec->packet_state = 0; length--; break; case 3: if (*b == 0x00) { dec->packet_state++; dec->packet_length = 0; } else if (*b != 0xaa) { dec->packet_state = 0; } b++; length--; break; case 4: dec->packet[dec->packet_length++] = *b++; if (dec->packet_length == 2) { if (dec->packet[0] == 'A' && dec->packet[1] == 'V') { dec->packet_type = TTUSB_DEC_PACKET_PVA; dec->packet_state++; } else if (dec->packet[0] == 'S') { dec->packet_type = TTUSB_DEC_PACKET_SECTION; dec->packet_state++; } else if (dec->packet[0] == 0x00) { dec->packet_type = TTUSB_DEC_PACKET_EMPTY; dec->packet_payload_length = 2; dec->packet_state = 7; } else { printk("%s: unknown packet type: %02x%02x\n", __func__, dec->packet[0], dec->packet[1]); dec->packet_state = 0; } } length--; break; case 5: dec->packet[dec->packet_length++] = *b++; if (dec->packet_type == TTUSB_DEC_PACKET_PVA && dec->packet_length == 8) { dec->packet_state++; dec->packet_payload_length = 8 + (dec->packet[6] << 8) + dec->packet[7]; } else if (dec->packet_type == TTUSB_DEC_PACKET_SECTION && dec->packet_length == 5) { dec->packet_state++; dec->packet_payload_length = 5 + ((dec->packet[3] & 0x0f) << 8) + dec->packet[4]; } length--; break; case 6: { int remainder = dec->packet_payload_length - dec->packet_length; if (length >= remainder) { memcpy(dec->packet + dec->packet_length, b, remainder); dec->packet_length += remainder; b += remainder; length -= remainder; dec->packet_state++; } else { memcpy(&dec->packet[dec->packet_length], b, length); dec->packet_length += length; length = 0; } break; } case 7: { int tail = 4; dec->packet[dec->packet_length++] = *b++; if (dec->packet_type == TTUSB_DEC_PACKET_SECTION && dec->packet_payload_length % 2) tail++; if (dec->packet_length == dec->packet_payload_length + tail) { ttusb_dec_process_packet(dec); dec->packet_state = 0; } length--; break; } default: printk("%s: illegal packet state encountered.\n", __func__); dec->packet_state = 0; } } } static void ttusb_dec_process_urb_frame_list(struct work_struct *t) { struct ttusb_dec *dec = from_work(dec, t, urb_bh_work); struct list_head *item; struct urb_frame *frame; unsigned long flags; while (1) { spin_lock_irqsave(&dec->urb_frame_list_lock, flags); if ((item = dec->urb_frame_list.next) != &dec->urb_frame_list) { frame = list_entry(item, struct urb_frame, urb_frame_list); list_del(&frame->urb_frame_list); } else { spin_unlock_irqrestore(&dec->urb_frame_list_lock, flags); return; } spin_unlock_irqrestore(&dec->urb_frame_list_lock, flags); ttusb_dec_process_urb_frame(dec, frame->data, frame->length); kfree(frame); } } static void ttusb_dec_process_urb(struct urb *urb) { struct ttusb_dec *dec = urb->context; if (!urb->status) { int i; for (i = 0; i < FRAMES_PER_ISO_BUF; i++) { struct usb_iso_packet_descriptor *d; u8 *b; int length; struct urb_frame *frame; d = &urb->iso_frame_desc[i]; b = urb->transfer_buffer + d->offset; length = d->actual_length; if ((frame = kmalloc(sizeof(struct urb_frame), GFP_ATOMIC))) { unsigned long flags; memcpy(frame->data, b, length); frame->length = length; spin_lock_irqsave(&dec->urb_frame_list_lock, flags); list_add_tail(&frame->urb_frame_list, &dec->urb_frame_list); spin_unlock_irqrestore(&dec->urb_frame_list_lock, flags); queue_work(system_bh_wq, &dec->urb_bh_work); } } } else { /* -ENOENT is expected when unlinking urbs */ if (urb->status != -ENOENT) dprintk("%s: urb error: %d\n", __func__, urb->status); } if (dec->iso_stream_count) usb_submit_urb(urb, GFP_ATOMIC); } static void ttusb_dec_setup_urbs(struct ttusb_dec *dec) { int i, j, buffer_offset = 0; dprintk("%s\n", __func__); for (i = 0; i < ISO_BUF_COUNT; i++) { int frame_offset = 0; struct urb *urb = dec->iso_urb[i]; urb->dev = dec->udev; urb->context = dec; urb->complete = ttusb_dec_process_urb; urb->pipe = dec->in_pipe; urb->transfer_flags = URB_ISO_ASAP; urb->interval = 1; urb->number_of_packets = FRAMES_PER_ISO_BUF; urb->transfer_buffer_length = ISO_FRAME_SIZE * FRAMES_PER_ISO_BUF; urb->transfer_buffer = dec->iso_buffer + buffer_offset; buffer_offset += ISO_FRAME_SIZE * FRAMES_PER_ISO_BUF; for (j = 0; j < FRAMES_PER_ISO_BUF; j++) { urb->iso_frame_desc[j].offset = frame_offset; urb->iso_frame_desc[j].length = ISO_FRAME_SIZE; frame_offset += ISO_FRAME_SIZE; } } } static void ttusb_dec_stop_iso_xfer(struct ttusb_dec *dec) { int i; dprintk("%s\n", __func__); if (mutex_lock_interruptible(&dec->iso_mutex)) return; dec->iso_stream_count--; if (!dec->iso_stream_count) { for (i = 0; i < ISO_BUF_COUNT; i++) usb_kill_urb(dec->iso_urb[i]); } mutex_unlock(&dec->iso_mutex); } /* Setting the interface of the DEC tends to take down the USB communications * for a short period, so it's important not to call this function just before * trying to talk to it. */ static int ttusb_dec_set_interface(struct ttusb_dec *dec, enum ttusb_dec_interface interface) { int result = 0; u8 b[] = { 0x05 }; if (interface != dec->interface) { switch (interface) { case TTUSB_DEC_INTERFACE_INITIAL: result = usb_set_interface(dec->udev, 0, 0); break; case TTUSB_DEC_INTERFACE_IN: result = ttusb_dec_send_command(dec, 0x80, sizeof(b), b, NULL, NULL); if (result) return result; result = usb_set_interface(dec->udev, 0, 8); break; case TTUSB_DEC_INTERFACE_OUT: result = usb_set_interface(dec->udev, 0, 1); break; } if (result) return result; dec->interface = interface; } return 0; } static int ttusb_dec_start_iso_xfer(struct ttusb_dec *dec) { int i, result; dprintk("%s\n", __func__); if (mutex_lock_interruptible(&dec->iso_mutex)) return -EAGAIN; if (!dec->iso_stream_count) { ttusb_dec_setup_urbs(dec); dec->packet_state = 0; dec->v_pes_postbytes = 0; dec->next_packet_id = 0; for (i = 0; i < ISO_BUF_COUNT; i++) { if ((result = usb_submit_urb(dec->iso_urb[i], GFP_ATOMIC))) { printk("%s: failed urb submission %d: error %d\n", __func__, i, result); while (i) { usb_kill_urb(dec->iso_urb[i - 1]); i--; } mutex_unlock(&dec->iso_mutex); return result; } } } dec->iso_stream_count++; mutex_unlock(&dec->iso_mutex); return 0; } static int ttusb_dec_start_ts_feed(struct dvb_demux_feed *dvbdmxfeed) { struct dvb_demux *dvbdmx = dvbdmxfeed->demux; struct ttusb_dec *dec = dvbdmx->priv; u8 b0[] = { 0x05 }; int result = 0; dprintk("%s\n", __func__); dprintk(" ts_type:"); if (dvbdmxfeed->ts_type & TS_DECODER) dprintk(" TS_DECODER"); if (dvbdmxfeed->ts_type & TS_PACKET) dprintk(" TS_PACKET"); if (dvbdmxfeed->ts_type & TS_PAYLOAD_ONLY) dprintk(" TS_PAYLOAD_ONLY"); dprintk("\n"); switch (dvbdmxfeed->pes_type) { case DMX_PES_VIDEO: dprintk(" pes_type: DMX_PES_VIDEO\n"); dec->pid[DMX_PES_PCR] = dvbdmxfeed->pid; dec->pid[DMX_PES_VIDEO] = dvbdmxfeed->pid; dec->video_filter = dvbdmxfeed->filter; ttusb_dec_set_pids(dec); break; case DMX_PES_AUDIO: dprintk(" pes_type: DMX_PES_AUDIO\n"); dec->pid[DMX_PES_AUDIO] = dvbdmxfeed->pid; dec->audio_filter = dvbdmxfeed->filter; ttusb_dec_set_pids(dec); break; case DMX_PES_TELETEXT: dec->pid[DMX_PES_TELETEXT] = dvbdmxfeed->pid; dprintk(" pes_type: DMX_PES_TELETEXT(not supported)\n"); return -ENOSYS; case DMX_PES_PCR: dprintk(" pes_type: DMX_PES_PCR\n"); dec->pid[DMX_PES_PCR] = dvbdmxfeed->pid; ttusb_dec_set_pids(dec); break; case DMX_PES_OTHER: dprintk(" pes_type: DMX_PES_OTHER(not supported)\n"); return -ENOSYS; default: dprintk(" pes_type: unknown (%d)\n", dvbdmxfeed->pes_type); return -EINVAL; } result = ttusb_dec_send_command(dec, 0x80, sizeof(b0), b0, NULL, NULL); if (result) return result; dec->pva_stream_count++; return ttusb_dec_start_iso_xfer(dec); } static int ttusb_dec_start_sec_feed(struct dvb_demux_feed *dvbdmxfeed) { struct ttusb_dec *dec = dvbdmxfeed->demux->priv; u8 b0[] = { 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; __be16 pid; u8 c[COMMAND_PACKET_SIZE]; int c_length; int result; struct filter_info *finfo; unsigned long flags; u8 x = 1; dprintk("%s\n", __func__); pid = htons(dvbdmxfeed->pid); memcpy(&b0[0], &pid, 2); memcpy(&b0[4], &x, 1); memcpy(&b0[5], &dvbdmxfeed->filter->filter.filter_value[0], 1); result = ttusb_dec_send_command(dec, 0x60, sizeof(b0), b0, &c_length, c); if (!result) { if (c_length == 2) { if (!(finfo = kmalloc(sizeof(struct filter_info), GFP_ATOMIC))) return -ENOMEM; finfo->stream_id = c[1]; finfo->filter = dvbdmxfeed->filter; spin_lock_irqsave(&dec->filter_info_list_lock, flags); list_add_tail(&finfo->filter_info_list, &dec->filter_info_list); spin_unlock_irqrestore(&dec->filter_info_list_lock, flags); dvbdmxfeed->priv = finfo; dec->filter_stream_count++; return ttusb_dec_start_iso_xfer(dec); } return -EAGAIN; } else return result; } static int ttusb_dec_start_feed(struct dvb_demux_feed *dvbdmxfeed) { struct dvb_demux *dvbdmx = dvbdmxfeed->demux; dprintk("%s\n", __func__); if (!dvbdmx->dmx.frontend) return -EINVAL; dprintk(" pid: 0x%04X\n", dvbdmxfeed->pid); switch (dvbdmxfeed->type) { case DMX_TYPE_TS: return ttusb_dec_start_ts_feed(dvbdmxfeed); case DMX_TYPE_SEC: return ttusb_dec_start_sec_feed(dvbdmxfeed); default: dprintk(" type: unknown (%d)\n", dvbdmxfeed->type); return -EINVAL; } } static int ttusb_dec_stop_ts_feed(struct dvb_demux_feed *dvbdmxfeed) { struct ttusb_dec *dec = dvbdmxfeed->demux->priv; u8 b0[] = { 0x00 }; ttusb_dec_send_command(dec, 0x81, sizeof(b0), b0, NULL, NULL); dec->pva_stream_count--; ttusb_dec_stop_iso_xfer(dec); return 0; } static int ttusb_dec_stop_sec_feed(struct dvb_demux_feed *dvbdmxfeed) { struct ttusb_dec *dec = dvbdmxfeed->demux->priv; u8 b0[] = { 0x00, 0x00 }; struct filter_info *finfo = dvbdmxfeed->priv; unsigned long flags; b0[1] = finfo->stream_id; spin_lock_irqsave(&dec->filter_info_list_lock, flags); list_del(&finfo->filter_info_list); spin_unlock_irqrestore(&dec->filter_info_list_lock, flags); kfree(finfo); ttusb_dec_send_command(dec, 0x62, sizeof(b0), b0, NULL, NULL); dec->filter_stream_count--; ttusb_dec_stop_iso_xfer(dec); return 0; } static int ttusb_dec_stop_feed(struct dvb_demux_feed *dvbdmxfeed) { dprintk("%s\n", __func__); switch (dvbdmxfeed->type) { case DMX_TYPE_TS: return ttusb_dec_stop_ts_feed(dvbdmxfeed); case DMX_TYPE_SEC: return ttusb_dec_stop_sec_feed(dvbdmxfeed); } return 0; } static void ttusb_dec_free_iso_urbs(struct ttusb_dec *dec) { int i; dprintk("%s\n", __func__); for (i = 0; i < ISO_BUF_COUNT; i++) usb_free_urb(dec->iso_urb[i]); kfree(dec->iso_buffer); } static int ttusb_dec_alloc_iso_urbs(struct ttusb_dec *dec) { int i; dprintk("%s\n", __func__); dec->iso_buffer = kcalloc(FRAMES_PER_ISO_BUF * ISO_BUF_COUNT, ISO_FRAME_SIZE, GFP_KERNEL); if (!dec->iso_buffer) return -ENOMEM; for (i = 0; i < ISO_BUF_COUNT; i++) { struct urb *urb; if (!(urb = usb_alloc_urb(FRAMES_PER_ISO_BUF, GFP_ATOMIC))) { ttusb_dec_free_iso_urbs(dec); return -ENOMEM; } dec->iso_urb[i] = urb; } ttusb_dec_setup_urbs(dec); return 0; } static void ttusb_dec_init_bh_work(struct ttusb_dec *dec) { spin_lock_init(&dec->urb_frame_list_lock); INIT_LIST_HEAD(&dec->urb_frame_list); INIT_WORK(&dec->urb_bh_work, ttusb_dec_process_urb_frame_list); } static int ttusb_init_rc( struct ttusb_dec *dec) { struct input_dev *input_dev; u8 b[] = { 0x00, 0x01 }; int i; int err; usb_make_path(dec->udev, dec->rc_phys, sizeof(dec->rc_phys)); strlcat(dec->rc_phys, "/input0", sizeof(dec->rc_phys)); input_dev = input_allocate_device(); if (!input_dev) return -ENOMEM; input_dev->name = "ttusb_dec remote control"; input_dev->phys = dec->rc_phys; input_dev->evbit[0] = BIT_MASK(EV_KEY); input_dev->keycodesize = sizeof(u16); input_dev->keycodemax = 0x1a; input_dev->keycode = rc_keys; for (i = 0; i < ARRAY_SIZE(rc_keys); i++) set_bit(rc_keys[i], input_dev->keybit); err = input_register_device(input_dev); if (err) { input_free_device(input_dev); return err; } dec->rc_input_dev = input_dev; if (usb_submit_urb(dec->irq_urb, GFP_KERNEL)) printk("%s: usb_submit_urb failed\n",__func__); /* enable irq pipe */ ttusb_dec_send_command(dec,0xb0,sizeof(b),b,NULL,NULL); return 0; } static void ttusb_dec_init_v_pes(struct ttusb_dec *dec) { dprintk("%s\n", __func__); dec->v_pes[0] = 0x00; dec->v_pes[1] = 0x00; dec->v_pes[2] = 0x01; dec->v_pes[3] = 0xe0; } static int ttusb_dec_init_usb(struct ttusb_dec *dec) { int result; dprintk("%s\n", __func__); mutex_init(&dec->usb_mutex); mutex_init(&dec->iso_mutex); dec->command_pipe = usb_sndbulkpipe(dec->udev, COMMAND_PIPE); dec->result_pipe = usb_rcvbulkpipe(dec->udev, RESULT_PIPE); dec->in_pipe = usb_rcvisocpipe(dec->udev, IN_PIPE); dec->out_pipe = usb_sndisocpipe(dec->udev, OUT_PIPE); dec->irq_pipe = usb_rcvintpipe(dec->udev, IRQ_PIPE); if(enable_rc) { dec->irq_urb = usb_alloc_urb(0, GFP_KERNEL); if(!dec->irq_urb) { return -ENOMEM; } dec->irq_buffer = usb_alloc_coherent(dec->udev,IRQ_PACKET_SIZE, GFP_KERNEL, &dec->irq_dma_handle); if(!dec->irq_buffer) { usb_free_urb(dec->irq_urb); return -ENOMEM; } usb_fill_int_urb(dec->irq_urb, dec->udev,dec->irq_pipe, dec->irq_buffer, IRQ_PACKET_SIZE, ttusb_dec_handle_irq, dec, 1); dec->irq_urb->transfer_dma = dec->irq_dma_handle; dec->irq_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; } result = ttusb_dec_alloc_iso_urbs(dec); if (result) { usb_free_urb(dec->irq_urb); usb_free_coherent(dec->udev, IRQ_PACKET_SIZE, dec->irq_buffer, dec->irq_dma_handle); } return result; } static int ttusb_dec_boot_dsp(struct ttusb_dec *dec) { int i, j, actual_len, result, size, trans_count; u8 b0[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x61, 0x00 }; u8 b1[] = { 0x61 }; u8 *b; char idstring[21]; const u8 *firmware = NULL; size_t firmware_size = 0; u16 firmware_csum = 0; __be16 firmware_csum_ns; __be32 firmware_size_nl; u32 crc32_csum, crc32_check; __be32 tmp; const struct firmware *fw_entry = NULL; dprintk("%s\n", __func__); result = request_firmware(&fw_entry, dec->firmware_name, &dec->udev->dev); if (result) { printk(KERN_ERR "%s: Firmware (%s) unavailable.\n", __func__, dec->firmware_name); return result; } firmware = fw_entry->data; firmware_size = fw_entry->size; if (firmware_size < 60) { printk("%s: firmware size too small for DSP code (%zu < 60).\n", __func__, firmware_size); release_firmware(fw_entry); return -ENOENT; } /* a 32 bit checksum over the first 56 bytes of the DSP Code is stored at offset 56 of file, so use it to check if the firmware file is valid. */ crc32_csum = crc32(~0L, firmware, 56) ^ ~0L; memcpy(&tmp, &firmware[56], 4); crc32_check = ntohl(tmp); if (crc32_csum != crc32_check) { printk("%s: crc32 check of DSP code failed (calculated 0x%08x != 0x%08x in file), file invalid.\n", __func__, crc32_csum, crc32_check); release_firmware(fw_entry); return -ENOENT; } memcpy(idstring, &firmware[36], 20); idstring[20] = '\0'; printk(KERN_INFO "ttusb_dec: found DSP code \"%s\".\n", idstring); firmware_size_nl = htonl(firmware_size); memcpy(b0, &firmware_size_nl, 4); firmware_csum = crc16(~0, firmware, firmware_size) ^ ~0; firmware_csum_ns = htons(firmware_csum); memcpy(&b0[6], &firmware_csum_ns, 2); result = ttusb_dec_send_command(dec, 0x41, sizeof(b0), b0, NULL, NULL); if (result) { release_firmware(fw_entry); return result; } trans_count = 0; j = 0; b = kmalloc(ARM_PACKET_SIZE, GFP_KERNEL); if (b == NULL) { release_firmware(fw_entry); return -ENOMEM; } for (i = 0; i < firmware_size; i += COMMAND_PACKET_SIZE) { size = firmware_size - i; if (size > COMMAND_PACKET_SIZE) size = COMMAND_PACKET_SIZE; b[j + 0] = 0xaa; b[j + 1] = trans_count++; b[j + 2] = 0xf0; b[j + 3] = size; memcpy(&b[j + 4], &firmware[i], size); j += COMMAND_PACKET_SIZE + 4; if (j >= ARM_PACKET_SIZE) { result = usb_bulk_msg(dec->udev, dec->command_pipe, b, ARM_PACKET_SIZE, &actual_len, 100); j = 0; } else if (size < COMMAND_PACKET_SIZE) { result = usb_bulk_msg(dec->udev, dec->command_pipe, b, j - COMMAND_PACKET_SIZE + size, &actual_len, 100); } } result = ttusb_dec_send_command(dec, 0x43, sizeof(b1), b1, NULL, NULL); release_firmware(fw_entry); kfree(b); return result; } static int ttusb_dec_init_stb(struct ttusb_dec *dec) { int result; unsigned int mode = 0, model = 0, version = 0; dprintk("%s\n", __func__); result = ttusb_dec_get_stb_state(dec, &mode, &model, &version); if (result) return result; if (!mode) { if (version == 0xABCDEFAB) printk(KERN_INFO "ttusb_dec: no version info in Firmware\n"); else printk(KERN_INFO "ttusb_dec: Firmware %x.%02x%c%c\n", version >> 24, (version >> 16) & 0xff, (version >> 8) & 0xff, version & 0xff); result = ttusb_dec_boot_dsp(dec); if (result) return result; } else { /* We can't trust the USB IDs that some firmwares give the box */ switch (model) { case 0x00070001: case 0x00070008: case 0x0007000c: ttusb_dec_set_model(dec, TTUSB_DEC3000S); break; case 0x00070009: case 0x00070013: ttusb_dec_set_model(dec, TTUSB_DEC2000T); break; case 0x00070011: ttusb_dec_set_model(dec, TTUSB_DEC2540T); break; default: printk(KERN_ERR "%s: unknown model returned by firmware (%08x) - please report\n", __func__, model); return -ENOENT; } if (version >= 0x01770000) dec->can_playback = 1; } return 0; } static int ttusb_dec_init_dvb(struct ttusb_dec *dec) { int result; dprintk("%s\n", __func__); if ((result = dvb_register_adapter(&dec->adapter, dec->model_name, THIS_MODULE, &dec->udev->dev, adapter_nr)) < 0) { printk("%s: dvb_register_adapter failed: error %d\n", __func__, result); return result; } dec->demux.dmx.capabilities = DMX_TS_FILTERING | DMX_SECTION_FILTERING; dec->demux.priv = (void *)dec; dec->demux.filternum = 31; dec->demux.feednum = 31; dec->demux.start_feed = ttusb_dec_start_feed; dec->demux.stop_feed = ttusb_dec_stop_feed; dec->demux.write_to_decoder = NULL; if ((result = dvb_dmx_init(&dec->demux)) < 0) { printk("%s: dvb_dmx_init failed: error %d\n", __func__, result); dvb_unregister_adapter(&dec->adapter); return result; } dec->dmxdev.filternum = 32; dec->dmxdev.demux = &dec->demux.dmx; dec->dmxdev.capabilities = 0; if ((result = dvb_dmxdev_init(&dec->dmxdev, &dec->adapter)) < 0) { printk("%s: dvb_dmxdev_init failed: error %d\n", __func__, result); dvb_dmx_release(&dec->demux); dvb_unregister_adapter(&dec->adapter); return result; } dec->frontend.source = DMX_FRONTEND_0; if ((result = dec->demux.dmx.add_frontend(&dec->demux.dmx, &dec->frontend)) < 0) { printk("%s: dvb_dmx_init failed: error %d\n", __func__, result); dvb_dmxdev_release(&dec->dmxdev); dvb_dmx_release(&dec->demux); dvb_unregister_adapter(&dec->adapter); return result; } if ((result = dec->demux.dmx.connect_frontend(&dec->demux.dmx, &dec->frontend)) < 0) { printk("%s: dvb_dmx_init failed: error %d\n", __func__, result); dec->demux.dmx.remove_frontend(&dec->demux.dmx, &dec->frontend); dvb_dmxdev_release(&dec->dmxdev); dvb_dmx_release(&dec->demux); dvb_unregister_adapter(&dec->adapter); return result; } dvb_net_init(&dec->adapter, &dec->dvb_net, &dec->demux.dmx); return 0; } static void ttusb_dec_exit_dvb(struct ttusb_dec *dec) { dprintk("%s\n", __func__); dvb_net_release(&dec->dvb_net); dec->demux.dmx.close(&dec->demux.dmx); dec->demux.dmx.remove_frontend(&dec->demux.dmx, &dec->frontend); dvb_dmxdev_release(&dec->dmxdev); dvb_dmx_release(&dec->demux); if (dec->fe) { dvb_unregister_frontend(dec->fe); dvb_frontend_detach(dec->fe); } dvb_unregister_adapter(&dec->adapter); } static void ttusb_dec_exit_rc(struct ttusb_dec *dec) { dprintk("%s\n", __func__); if (dec->rc_input_dev) { input_unregister_device(dec->rc_input_dev); dec->rc_input_dev = NULL; } } static void ttusb_dec_exit_usb(struct ttusb_dec *dec) { int i; dprintk("%s\n", __func__); if (enable_rc) { /* we have to check whether the irq URB is already submitted. * As the irq is submitted after the interface is changed, * this is the best method i figured out. * Any others?*/ if (dec->interface == TTUSB_DEC_INTERFACE_IN) usb_kill_urb(dec->irq_urb); usb_free_urb(dec->irq_urb); usb_free_coherent(dec->udev, IRQ_PACKET_SIZE, dec->irq_buffer, dec->irq_dma_handle); } dec->iso_stream_count = 0; for (i = 0; i < ISO_BUF_COUNT; i++) usb_kill_urb(dec->iso_urb[i]); ttusb_dec_free_iso_urbs(dec); } static void ttusb_dec_exit_bh_work(struct ttusb_dec *dec) { struct list_head *item; struct urb_frame *frame; cancel_work_sync(&dec->urb_bh_work); while ((item = dec->urb_frame_list.next) != &dec->urb_frame_list) { frame = list_entry(item, struct urb_frame, urb_frame_list); list_del(&frame->urb_frame_list); kfree(frame); } } static void ttusb_dec_init_filters(struct ttusb_dec *dec) { INIT_LIST_HEAD(&dec->filter_info_list); spin_lock_init(&dec->filter_info_list_lock); } static void ttusb_dec_exit_filters(struct ttusb_dec *dec) { struct list_head *item; struct filter_info *finfo; while ((item = dec->filter_info_list.next) != &dec->filter_info_list) { finfo = list_entry(item, struct filter_info, filter_info_list); list_del(&finfo->filter_info_list); kfree(finfo); } } static int fe_send_command(struct dvb_frontend* fe, const u8 command, int param_length, const u8 params[], int *result_length, u8 cmd_result[]) { struct ttusb_dec* dec = fe->dvb->priv; return ttusb_dec_send_command(dec, command, param_length, params, result_length, cmd_result); } static const struct ttusbdecfe_config fe_config = { .send_command = fe_send_command }; static int ttusb_dec_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev; struct ttusb_dec *dec; int result; dprintk("%s\n", __func__); udev = interface_to_usbdev(intf); if (!(dec = kzalloc(sizeof(struct ttusb_dec), GFP_KERNEL))) { printk("%s: couldn't allocate memory.\n", __func__); return -ENOMEM; } usb_set_intfdata(intf, (void *)dec); switch (id->idProduct) { case 0x1006: ttusb_dec_set_model(dec, TTUSB_DEC3000S); break; case 0x1008: ttusb_dec_set_model(dec, TTUSB_DEC2000T); break; case 0x1009: ttusb_dec_set_model(dec, TTUSB_DEC2540T); break; } dec->udev = udev; result = ttusb_dec_init_usb(dec); if (result) goto err_usb; result = ttusb_dec_init_stb(dec); if (result) goto err_stb; result = ttusb_dec_init_dvb(dec); if (result) goto err_stb; dec->adapter.priv = dec; switch (id->idProduct) { case 0x1006: dec->fe = ttusbdecfe_dvbs_attach(&fe_config); break; case 0x1008: case 0x1009: dec->fe = ttusbdecfe_dvbt_attach(&fe_config); break; } if (dec->fe == NULL) { printk("dvb-ttusb-dec: A frontend driver was not found for device [%04x:%04x]\n", le16_to_cpu(dec->udev->descriptor.idVendor), le16_to_cpu(dec->udev->descriptor.idProduct)); } else { if (dvb_register_frontend(&dec->adapter, dec->fe)) { printk("budget-ci: Frontend registration failed!\n"); if (dec->fe->ops.release) dec->fe->ops.release(dec->fe); dec->fe = NULL; } } ttusb_dec_init_v_pes(dec); ttusb_dec_init_filters(dec); ttusb_dec_init_bh_work(dec); dec->active = 1; ttusb_dec_set_interface(dec, TTUSB_DEC_INTERFACE_IN); if (enable_rc) ttusb_init_rc(dec); return 0; err_stb: ttusb_dec_exit_usb(dec); err_usb: kfree(dec); return result; } static void ttusb_dec_disconnect(struct usb_interface *intf) { struct ttusb_dec *dec = usb_get_intfdata(intf); usb_set_intfdata(intf, NULL); dprintk("%s\n", __func__); if (dec->active) { ttusb_dec_exit_bh_work(dec); ttusb_dec_exit_filters(dec); if(enable_rc) ttusb_dec_exit_rc(dec); ttusb_dec_exit_usb(dec); ttusb_dec_exit_dvb(dec); } kfree(dec); } static void ttusb_dec_set_model(struct ttusb_dec *dec, enum ttusb_dec_model model) { dec->model = model; switch (model) { case TTUSB_DEC2000T: dec->model_name = "DEC2000-t"; dec->firmware_name = "dvb-ttusb-dec-2000t.fw"; break; case TTUSB_DEC2540T: dec->model_name = "DEC2540-t"; dec->firmware_name = "dvb-ttusb-dec-2540t.fw"; break; case TTUSB_DEC3000S: dec->model_name = "DEC3000-s"; dec->firmware_name = "dvb-ttusb-dec-3000s.fw"; break; } } static const struct usb_device_id ttusb_dec_table[] = { {USB_DEVICE(0x0b48, 0x1006)}, /* DEC3000-s */ /*{USB_DEVICE(0x0b48, 0x1007)}, Unconfirmed */ {USB_DEVICE(0x0b48, 0x1008)}, /* DEC2000-t */ {USB_DEVICE(0x0b48, 0x1009)}, /* DEC2540-t */ {} }; static struct usb_driver ttusb_dec_driver = { .name = "ttusb-dec", .probe = ttusb_dec_probe, .disconnect = ttusb_dec_disconnect, .id_table = ttusb_dec_table, }; module_usb_driver(ttusb_dec_driver); MODULE_AUTHOR("Alex Woods <linux-dvb@giblets.org>"); MODULE_DESCRIPTION(DRIVER_NAME); MODULE_LICENSE("GPL"); MODULE_DEVICE_TABLE(usb, ttusb_dec_table);
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 // SPDX-License-Identifier: GPL-2.0-or-later /* * * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) * Copyright (C) 2002 Ralf Baechle DO1GRB (ralf@gnu.org) */ #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <net/ax25.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/tcp_states.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <net/rose.h> static void rose_heartbeat_expiry(struct timer_list *t); static void rose_timer_expiry(struct timer_list *); static void rose_idletimer_expiry(struct timer_list *); void rose_start_heartbeat(struct sock *sk) { sk_stop_timer(sk, &sk->sk_timer); sk->sk_timer.function = rose_heartbeat_expiry; sk->sk_timer.expires = jiffies + 5 * HZ; sk_reset_timer(sk, &sk->sk_timer, sk->sk_timer.expires); } void rose_start_t1timer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->t1; sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_t2timer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->t2; sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_t3timer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->t3; sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_hbtimer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->hb; sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_idletimer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); sk_stop_timer(sk, &rose->idletimer); if (rose->idle > 0) { rose->idletimer.function = rose_idletimer_expiry; rose->idletimer.expires = jiffies + rose->idle; sk_reset_timer(sk, &rose->idletimer, rose->idletimer.expires); } } void rose_stop_heartbeat(struct sock *sk) { sk_stop_timer(sk, &sk->sk_timer); } void rose_stop_timer(struct sock *sk) { sk_stop_timer(sk, &rose_sk(sk)->timer); } void rose_stop_idletimer(struct sock *sk) { sk_stop_timer(sk, &rose_sk(sk)->idletimer); } static void rose_heartbeat_expiry(struct timer_list *t) { struct sock *sk = timer_container_of(sk, t, sk_timer); struct rose_sock *rose = rose_sk(sk); bh_lock_sock(sk); if (sock_owned_by_user(sk)) { sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ/20); goto out; } switch (rose->state) { case ROSE_STATE_0: /* Magic here: If we listen() and a new link dies before it is accepted() it isn't 'dead' so doesn't get removed. */ if (sock_flag(sk, SOCK_DESTROY) || (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { bh_unlock_sock(sk); rose_destroy_socket(sk); sock_put(sk); return; } break; case ROSE_STATE_3: /* * Check for the state of the receive buffer. */ if (atomic_read(&sk->sk_rmem_alloc) < (sk->sk_rcvbuf / 2) && (rose->condition & ROSE_COND_OWN_RX_BUSY)) { rose->condition &= ~ROSE_COND_OWN_RX_BUSY; rose->condition &= ~ROSE_COND_ACK_PENDING; rose->vl = rose->vr; rose_write_internal(sk, ROSE_RR); rose_stop_timer(sk); /* HB */ break; } break; } rose_start_heartbeat(sk); out: bh_unlock_sock(sk); sock_put(sk); } static void rose_timer_expiry(struct timer_list *t) { struct rose_sock *rose = timer_container_of(rose, t, timer); struct sock *sk = &rose->sock; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { sk_reset_timer(sk, &rose->timer, jiffies + HZ/20); goto out; } switch (rose->state) { case ROSE_STATE_1: /* T1 */ case ROSE_STATE_4: /* T2 */ rose_write_internal(sk, ROSE_CLEAR_REQUEST); rose->state = ROSE_STATE_2; rose_start_t3timer(sk); break; case ROSE_STATE_2: /* T3 */ rose_neigh_put(rose->neighbour); rose_disconnect(sk, ETIMEDOUT, -1, -1); break; case ROSE_STATE_3: /* HB */ if (rose->condition & ROSE_COND_ACK_PENDING) { rose->condition &= ~ROSE_COND_ACK_PENDING; rose_enquiry_response(sk); } break; } out: bh_unlock_sock(sk); sock_put(sk); } static void rose_idletimer_expiry(struct timer_list *t) { struct rose_sock *rose = timer_container_of(rose, t, idletimer); struct sock *sk = &rose->sock; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { sk_reset_timer(sk, &rose->idletimer, jiffies + HZ/20); goto out; } rose_clear_queues(sk); rose_write_internal(sk, ROSE_CLEAR_REQUEST); rose_sk(sk)->state = ROSE_STATE_2; rose_start_t3timer(sk); sk->sk_state = TCP_CLOSE; sk->sk_err = 0; sk->sk_shutdown |= SEND_SHUTDOWN; if (!sock_flag(sk, SOCK_DEAD)) { sk->sk_state_change(sk); sock_set_flag(sk, SOCK_DEAD); } out: bh_unlock_sock(sk); sock_put(sk); }
12 258 313 310 259 313 308 309 311 309 311 17 16 13 1 13 17 17 14 2 17 2 13 16 17 17 17 1 2 2 2 1 1 2 2 2 13 13 4 4 4 13 1 1 1 1 1 1 5 2 3 3 1 2 3 5 1 1 9 1 8 2 2 2 2 1 5 3 3 1 1 11 11 6 2 3 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 // SPDX-License-Identifier: GPL-2.0 /* * IPv6 Address Label subsystem * for the IPv6 "Default" Source Address Selection * * Copyright (C)2007 USAGI/WIDE Project */ /* * Author: * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org> */ #include <linux/kernel.h> #include <linux/list.h> #include <linux/rcupdate.h> #include <linux/in6.h> #include <linux/slab.h> #include <net/addrconf.h> #include <linux/if_addrlabel.h> #include <linux/netlink.h> #include <linux/rtnetlink.h> /* * Policy Table */ struct ip6addrlbl_entry { struct in6_addr prefix; int prefixlen; int ifindex; int addrtype; u32 label; struct hlist_node list; struct rcu_head rcu; }; /* * Default policy table (RFC6724 + extensions) * * prefix addr_type label * ------------------------------------------------------------------------- * ::1/128 LOOPBACK 0 * ::/0 N/A 1 * 2002::/16 N/A 2 * ::/96 COMPATv4 3 * ::ffff:0:0/96 V4MAPPED 4 * fc00::/7 N/A 5 ULA (RFC 4193) * 2001::/32 N/A 6 Teredo (RFC 4380) * 2001:10::/28 N/A 7 ORCHID (RFC 4843) * fec0::/10 N/A 11 Site-local * (deprecated by RFC3879) * 3ffe::/16 N/A 12 6bone * * Note: 0xffffffff is used if we do not have any policies. * Note: Labels for ULA and 6to4 are different from labels listed in RFC6724. */ #define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL static const __net_initconst struct ip6addrlbl_init_table { const struct in6_addr *prefix; int prefixlen; u32 label; } ip6addrlbl_init_table[] = { { /* ::/0 */ .prefix = &in6addr_any, .label = 1, }, { /* fc00::/7 */ .prefix = &(struct in6_addr){ { { 0xfc } } } , .prefixlen = 7, .label = 5, }, { /* fec0::/10 */ .prefix = &(struct in6_addr){ { { 0xfe, 0xc0 } } }, .prefixlen = 10, .label = 11, }, { /* 2002::/16 */ .prefix = &(struct in6_addr){ { { 0x20, 0x02 } } }, .prefixlen = 16, .label = 2, }, { /* 3ffe::/16 */ .prefix = &(struct in6_addr){ { { 0x3f, 0xfe } } }, .prefixlen = 16, .label = 12, }, { /* 2001::/32 */ .prefix = &(struct in6_addr){ { { 0x20, 0x01 } } }, .prefixlen = 32, .label = 6, }, { /* 2001:10::/28 */ .prefix = &(struct in6_addr){ { { 0x20, 0x01, 0x00, 0x10 } } }, .prefixlen = 28, .label = 7, }, { /* ::ffff:0:0 */ .prefix = &(struct in6_addr){ { { [10] = 0xff, [11] = 0xff } } }, .prefixlen = 96, .label = 4, }, { /* ::/96 */ .prefix = &in6addr_any, .prefixlen = 96, .label = 3, }, { /* ::1/128 */ .prefix = &in6addr_loopback, .prefixlen = 128, .label = 0, } }; /* Find label */ static bool __ip6addrlbl_match(const struct ip6addrlbl_entry *p, const struct in6_addr *addr, int addrtype, int ifindex) { if (p->ifindex && p->ifindex != ifindex) return false; if (p->addrtype && p->addrtype != addrtype) return false; if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen)) return false; return true; } static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net, const struct in6_addr *addr, int type, int ifindex) { struct ip6addrlbl_entry *p; hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) { if (__ip6addrlbl_match(p, addr, type, ifindex)) return p; } return NULL; } u32 ipv6_addr_label(struct net *net, const struct in6_addr *addr, int type, int ifindex) { u32 label; struct ip6addrlbl_entry *p; type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK; rcu_read_lock(); p = __ipv6_addr_label(net, addr, type, ifindex); label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; rcu_read_unlock(); net_dbg_ratelimited("%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n", __func__, addr, type, ifindex, label); return label; } /* allocate one entry */ static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix, int prefixlen, int ifindex, u32 label) { struct ip6addrlbl_entry *newp; int addrtype; net_dbg_ratelimited("%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n", __func__, prefix, prefixlen, ifindex, (unsigned int)label); addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK); switch (addrtype) { case IPV6_ADDR_MAPPED: if (prefixlen > 96) return ERR_PTR(-EINVAL); if (prefixlen < 96) addrtype = 0; break; case IPV6_ADDR_COMPATv4: if (prefixlen != 96) addrtype = 0; break; case IPV6_ADDR_LOOPBACK: if (prefixlen != 128) addrtype = 0; break; } newp = kmalloc(sizeof(*newp), GFP_KERNEL); if (!newp) return ERR_PTR(-ENOMEM); ipv6_addr_prefix(&newp->prefix, prefix, prefixlen); newp->prefixlen = prefixlen; newp->ifindex = ifindex; newp->addrtype = addrtype; newp->label = label; INIT_HLIST_NODE(&newp->list); return newp; } /* add a label */ static int __ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp, int replace) { struct ip6addrlbl_entry *last = NULL, *p = NULL; struct hlist_node *n; int ret = 0; net_dbg_ratelimited("%s(newp=%p, replace=%d)\n", __func__, newp, replace); hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { if (p->prefixlen == newp->prefixlen && p->ifindex == newp->ifindex && ipv6_addr_equal(&p->prefix, &newp->prefix)) { if (!replace) { ret = -EEXIST; goto out; } hlist_replace_rcu(&p->list, &newp->list); kfree_rcu(p, rcu); goto out; } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) || (p->prefixlen < newp->prefixlen)) { hlist_add_before_rcu(&newp->list, &p->list); goto out; } last = p; } if (last) hlist_add_behind_rcu(&newp->list, &last->list); else hlist_add_head_rcu(&newp->list, &net->ipv6.ip6addrlbl_table.head); out: if (!ret) WRITE_ONCE(net->ipv6.ip6addrlbl_table.seq, net->ipv6.ip6addrlbl_table.seq + 1); return ret; } /* add a label */ static int ip6addrlbl_add(struct net *net, const struct in6_addr *prefix, int prefixlen, int ifindex, u32 label, int replace) { struct ip6addrlbl_entry *newp; int ret = 0; net_dbg_ratelimited("%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", __func__, prefix, prefixlen, ifindex, (unsigned int)label, replace); newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label); if (IS_ERR(newp)) return PTR_ERR(newp); spin_lock(&net->ipv6.ip6addrlbl_table.lock); ret = __ip6addrlbl_add(net, newp, replace); spin_unlock(&net->ipv6.ip6addrlbl_table.lock); if (ret) kfree(newp); return ret; } /* remove a label */ static int __ip6addrlbl_del(struct net *net, const struct in6_addr *prefix, int prefixlen, int ifindex) { struct ip6addrlbl_entry *p = NULL; struct hlist_node *n; int ret = -ESRCH; net_dbg_ratelimited("%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", __func__, prefix, prefixlen, ifindex); hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { if (p->prefixlen == prefixlen && p->ifindex == ifindex && ipv6_addr_equal(&p->prefix, prefix)) { hlist_del_rcu(&p->list); kfree_rcu(p, rcu); ret = 0; break; } } return ret; } static int ip6addrlbl_del(struct net *net, const struct in6_addr *prefix, int prefixlen, int ifindex) { struct in6_addr prefix_buf; int ret; net_dbg_ratelimited("%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", __func__, prefix, prefixlen, ifindex); ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); spin_lock(&net->ipv6.ip6addrlbl_table.lock); ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex); spin_unlock(&net->ipv6.ip6addrlbl_table.lock); return ret; } /* add default label */ static int __net_init ip6addrlbl_net_init(struct net *net) { struct ip6addrlbl_entry *p = NULL; struct hlist_node *n; int err; int i; spin_lock_init(&net->ipv6.ip6addrlbl_table.lock); INIT_HLIST_HEAD(&net->ipv6.ip6addrlbl_table.head); for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { err = ip6addrlbl_add(net, ip6addrlbl_init_table[i].prefix, ip6addrlbl_init_table[i].prefixlen, 0, ip6addrlbl_init_table[i].label, 0); if (err) goto err_ip6addrlbl_add; } return 0; err_ip6addrlbl_add: hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { hlist_del_rcu(&p->list); kfree_rcu(p, rcu); } return err; } static void __net_exit ip6addrlbl_net_exit(struct net *net) { struct ip6addrlbl_entry *p = NULL; struct hlist_node *n; /* Remove all labels belonging to the exiting net */ spin_lock(&net->ipv6.ip6addrlbl_table.lock); hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { hlist_del_rcu(&p->list); kfree_rcu(p, rcu); } spin_unlock(&net->ipv6.ip6addrlbl_table.lock); } static struct pernet_operations ipv6_addr_label_ops = { .init = ip6addrlbl_net_init, .exit = ip6addrlbl_net_exit, }; int __init ipv6_addr_label_init(void) { return register_pernet_subsys(&ipv6_addr_label_ops); } void ipv6_addr_label_cleanup(void) { unregister_pernet_subsys(&ipv6_addr_label_ops); } static const struct nla_policy ifal_policy[IFAL_MAX+1] = { [IFAL_ADDRESS] = { .len = sizeof(struct in6_addr), }, [IFAL_LABEL] = { .len = sizeof(u32), }, }; static bool addrlbl_ifindex_exists(struct net *net, int ifindex) { struct net_device *dev; rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifindex); rcu_read_unlock(); return dev != NULL; } static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct ifaddrlblmsg *ifal; struct nlattr *tb[IFAL_MAX+1]; struct in6_addr *pfx; u32 label; int err = 0; err = nlmsg_parse_deprecated(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy, extack); if (err < 0) return err; ifal = nlmsg_data(nlh); if (ifal->ifal_family != AF_INET6 || ifal->ifal_prefixlen > 128) return -EINVAL; if (!tb[IFAL_ADDRESS]) return -EINVAL; pfx = nla_data(tb[IFAL_ADDRESS]); if (!tb[IFAL_LABEL]) return -EINVAL; label = nla_get_u32(tb[IFAL_LABEL]); if (label == IPV6_ADDR_LABEL_DEFAULT) return -EINVAL; switch (nlh->nlmsg_type) { case RTM_NEWADDRLABEL: if (ifal->ifal_index && !addrlbl_ifindex_exists(net, ifal->ifal_index)) return -EINVAL; err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen, ifal->ifal_index, label, nlh->nlmsg_flags & NLM_F_REPLACE); break; case RTM_DELADDRLABEL: err = ip6addrlbl_del(net, pfx, ifal->ifal_prefixlen, ifal->ifal_index); break; default: err = -EOPNOTSUPP; } return err; } static void ip6addrlbl_putmsg(struct nlmsghdr *nlh, int prefixlen, int ifindex, u32 lseq) { struct ifaddrlblmsg *ifal = nlmsg_data(nlh); ifal->ifal_family = AF_INET6; ifal->__ifal_reserved = 0; ifal->ifal_prefixlen = prefixlen; ifal->ifal_flags = 0; ifal->ifal_index = ifindex; ifal->ifal_seq = lseq; }; static int ip6addrlbl_fill(struct sk_buff *skb, const struct ip6addrlbl_entry *p, u32 lseq, u32 portid, u32 seq, int event, unsigned int flags) { struct nlmsghdr *nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrlblmsg), flags); if (!nlh) return -EMSGSIZE; ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq); if (nla_put_in6_addr(skb, IFAL_ADDRESS, &p->prefix) < 0 || nla_put_u32(skb, IFAL_LABEL, p->label) < 0) { nlmsg_cancel(skb, nlh); return -EMSGSIZE; } nlmsg_end(skb, nlh); return 0; } static int ip6addrlbl_valid_dump_req(const struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct ifaddrlblmsg *ifal; ifal = nlmsg_payload(nlh, sizeof(*ifal)); if (!ifal) { NL_SET_ERR_MSG_MOD(extack, "Invalid header for address label dump request"); return -EINVAL; } if (ifal->__ifal_reserved || ifal->ifal_prefixlen || ifal->ifal_flags || ifal->ifal_index || ifal->ifal_seq) { NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for address label dump request"); return -EINVAL; } if (nlmsg_attrlen(nlh, sizeof(*ifal))) { NL_SET_ERR_MSG_MOD(extack, "Invalid data after header for address label dump request"); return -EINVAL; } return 0; } static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); struct ip6addrlbl_entry *p; int idx = 0, s_idx = cb->args[0]; int err = 0; u32 lseq; if (cb->strict_check) { err = ip6addrlbl_valid_dump_req(nlh, cb->extack); if (err < 0) return err; } rcu_read_lock(); lseq = READ_ONCE(net->ipv6.ip6addrlbl_table.seq); hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) { if (idx >= s_idx) { err = ip6addrlbl_fill(skb, p, lseq, NETLINK_CB(cb->skb).portid, nlh->nlmsg_seq, RTM_NEWADDRLABEL, NLM_F_MULTI); if (err < 0) break; } idx++; } rcu_read_unlock(); cb->args[0] = idx; return err; } static inline int ip6addrlbl_msgsize(void) { return NLMSG_ALIGN(sizeof(struct ifaddrlblmsg)) + nla_total_size(16) /* IFAL_ADDRESS */ + nla_total_size(4); /* IFAL_LABEL */ } static int ip6addrlbl_valid_get_req(struct sk_buff *skb, const struct nlmsghdr *nlh, struct nlattr **tb, struct netlink_ext_ack *extack) { struct ifaddrlblmsg *ifal; int i, err; ifal = nlmsg_payload(nlh, sizeof(*ifal)); if (!ifal) { NL_SET_ERR_MSG_MOD(extack, "Invalid header for addrlabel get request"); return -EINVAL; } if (!netlink_strict_get_check(skb)) return nlmsg_parse_deprecated(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy, extack); if (ifal->__ifal_reserved || ifal->ifal_flags || ifal->ifal_seq) { NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for addrlabel get request"); return -EINVAL; } err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy, extack); if (err) return err; for (i = 0; i <= IFAL_MAX; i++) { if (!tb[i]) continue; switch (i) { case IFAL_ADDRESS: break; default: NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in addrlabel get request"); return -EINVAL; } } return 0; } static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(in_skb->sk); struct ifaddrlblmsg *ifal; struct nlattr *tb[IFAL_MAX+1]; struct in6_addr *addr; u32 lseq; int err = 0; struct ip6addrlbl_entry *p; struct sk_buff *skb; err = ip6addrlbl_valid_get_req(in_skb, nlh, tb, extack); if (err < 0) return err; ifal = nlmsg_data(nlh); if (ifal->ifal_family != AF_INET6 || ifal->ifal_prefixlen != 128) return -EINVAL; if (ifal->ifal_index && !addrlbl_ifindex_exists(net, ifal->ifal_index)) return -EINVAL; if (!tb[IFAL_ADDRESS]) return -EINVAL; addr = nla_data(tb[IFAL_ADDRESS]); skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL); if (!skb) return -ENOBUFS; err = -ESRCH; rcu_read_lock(); p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); lseq = READ_ONCE(net->ipv6.ip6addrlbl_table.seq); if (p) err = ip6addrlbl_fill(skb, p, lseq, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWADDRLABEL, 0); rcu_read_unlock(); if (err < 0) { WARN_ON(err == -EMSGSIZE); kfree_skb(skb); } else { err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); } return err; } static const struct rtnl_msg_handler ipv6_adddr_label_rtnl_msg_handlers[] __initconst_or_module = { {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWADDRLABEL, .doit = ip6addrlbl_newdel, .flags = RTNL_FLAG_DOIT_UNLOCKED}, {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_DELADDRLABEL, .doit = ip6addrlbl_newdel, .flags = RTNL_FLAG_DOIT_UNLOCKED}, {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_GETADDRLABEL, .doit = ip6addrlbl_get, .dumpit = ip6addrlbl_dump, .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, }; int __init ipv6_addr_label_rtnl_register(void) { return rtnl_register_many(ipv6_adddr_label_rtnl_msg_handlers); }
19 1 1 21 11 19 12 20 1 21 2 17 2 2 2 2 2 21 21 19 19 2 605 601 603 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 // SPDX-License-Identifier: GPL-2.0 /* * x86 single-step support code, common to 32-bit and 64-bit. */ #include <linux/sched.h> #include <linux/sched/task_stack.h> #include <linux/mm.h> #include <linux/ptrace.h> #include <asm/desc.h> #include <asm/debugreg.h> #include <asm/mmu_context.h> unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs) { unsigned long addr, seg; addr = regs->ip; seg = regs->cs; if (v8086_mode(regs)) { addr = (addr & 0xffff) + (seg << 4); return addr; } #ifdef CONFIG_MODIFY_LDT_SYSCALL /* * We'll assume that the code segments in the GDT * are all zero-based. That is largely true: the * TLS segments are used for data, and the PNPBIOS * and APM bios ones we just ignore here. */ if ((seg & SEGMENT_TI_MASK) == SEGMENT_LDT) { struct desc_struct *desc; unsigned long base; seg >>= 3; mutex_lock(&child->mm->context.lock); if (unlikely(!child->mm->context.ldt || seg >= child->mm->context.ldt->nr_entries)) addr = -1L; /* bogus selector, access would fault */ else { desc = &child->mm->context.ldt->entries[seg]; base = get_desc_base(desc); /* 16-bit code segment? */ if (!desc->d) addr &= 0xffff; addr += base; } mutex_unlock(&child->mm->context.lock); } #endif return addr; } static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) { int i, copied; unsigned char opcode[15]; unsigned long addr = convert_ip_to_linear(child, regs); copied = access_process_vm(child, addr, opcode, sizeof(opcode), FOLL_FORCE); for (i = 0; i < copied; i++) { switch (opcode[i]) { /* popf and iret */ case 0x9d: case 0xcf: return 1; /* CHECKME: 64 65 */ /* opcode and address size prefixes */ case 0x66: case 0x67: continue; /* irrelevant prefixes (segment overrides and repeats) */ case 0x26: case 0x2e: case 0x36: case 0x3e: case 0x64: case 0x65: case 0xf0: case 0xf2: case 0xf3: continue; #ifdef CONFIG_X86_64 case 0x40 ... 0x4f: if (!user_64bit_mode(regs)) /* 32-bit mode: register increment */ return 0; /* 64-bit mode: REX prefix */ continue; #endif /* CHECKME: f2, f3 */ /* * pushf: NOTE! We should probably not let * the user see the TF bit being set. But * it's more pain than it's worth to avoid * it, and a debugger could emulate this * all in user space if it _really_ cares. */ case 0x9c: default: return 0; } } return 0; } /* * Enable single-stepping. Return nonzero if user mode is not using TF itself. */ static int enable_single_step(struct task_struct *child) { struct pt_regs *regs = task_pt_regs(child); unsigned long oflags; /* * If we stepped into a sysenter/syscall insn, it trapped in * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. * If user-mode had set TF itself, then it's still clear from * do_debug() and we need to set it again to restore the user * state so we don't wrongly set TIF_FORCED_TF below. * If enable_single_step() was used last and that is what * set TIF_SINGLESTEP, then both TF and TIF_FORCED_TF are * already set and our bookkeeping is fine. */ if (unlikely(test_tsk_thread_flag(child, TIF_SINGLESTEP))) regs->flags |= X86_EFLAGS_TF; /* * Always set TIF_SINGLESTEP. This will also * cause us to set TF when returning to user mode. */ set_tsk_thread_flag(child, TIF_SINGLESTEP); /* * Ensure that a trap is triggered once stepping out of a system * call prior to executing any user instruction. */ set_task_syscall_work(child, SYSCALL_EXIT_TRAP); oflags = regs->flags; /* Set TF on the kernel stack.. */ regs->flags |= X86_EFLAGS_TF; /* * ..but if TF is changed by the instruction we will trace, * don't mark it as being "us" that set it, so that we * won't clear it by hand later. * * Note that if we don't actually execute the popf because * of a signal arriving right now or suchlike, we will lose * track of the fact that it really was "us" that set it. */ if (is_setting_trap_flag(child, regs)) { clear_tsk_thread_flag(child, TIF_FORCED_TF); return 0; } /* * If TF was already set, check whether it was us who set it. * If not, we should never attempt a block step. */ if (oflags & X86_EFLAGS_TF) return test_tsk_thread_flag(child, TIF_FORCED_TF); set_tsk_thread_flag(child, TIF_FORCED_TF); return 1; } void set_task_blockstep(struct task_struct *task, bool on) { unsigned long debugctl; /* * Ensure irq/preemption can't change debugctl in between. * Note also that both TIF_BLOCKSTEP and debugctl should * be changed atomically wrt preemption. * * NOTE: this means that set/clear TIF_BLOCKSTEP is only safe if * task is current or it can't be running, otherwise we can race * with __switch_to_xtra(). We rely on ptrace_freeze_traced(). */ local_irq_disable(); debugctl = get_debugctlmsr(); if (on) { debugctl |= DEBUGCTLMSR_BTF; set_tsk_thread_flag(task, TIF_BLOCKSTEP); } else { debugctl &= ~DEBUGCTLMSR_BTF; clear_tsk_thread_flag(task, TIF_BLOCKSTEP); } if (task == current) update_debugctlmsr(debugctl); local_irq_enable(); } /* * Enable single or block step. */ static void enable_step(struct task_struct *child, bool block) { /* * Make sure block stepping (BTF) is not enabled unless it should be. * Note that we don't try to worry about any is_setting_trap_flag() * instructions after the first when using block stepping. * So no one should try to use debugger block stepping in a program * that uses user-mode single stepping itself. */ if (enable_single_step(child) && block) set_task_blockstep(child, true); else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) set_task_blockstep(child, false); } void user_enable_single_step(struct task_struct *child) { enable_step(child, 0); } void user_enable_block_step(struct task_struct *child) { enable_step(child, 1); } void user_disable_single_step(struct task_struct *child) { /* * Make sure block stepping (BTF) is disabled. */ if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) set_task_blockstep(child, false); /* Always clear TIF_SINGLESTEP... */ clear_tsk_thread_flag(child, TIF_SINGLESTEP); clear_task_syscall_work(child, SYSCALL_EXIT_TRAP); /* But touch TF only if it was set by us.. */ if (test_and_clear_tsk_thread_flag(child, TIF_FORCED_TF)) task_pt_regs(child)->flags &= ~X86_EFLAGS_TF; }
230 230 1 2 4 4 3 3 2 6 6 5 5 4 7 4 3 5 2 5 4 1 7 7 205 203 205 2 2 1 1 1 1 205 570 575 207 6 4 2 3 1 2 2 1 1 1 5 1 2 2 4 11 1 1 9 7 6 1 2 2 2 1 1 5 2 2 1 17 1 1 1 1 2 1 1 2 1 2 5 4 1 1 1 18 2 1 17 17 17 1 1 1 1 1 3 1 1 1 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 // SPDX-License-Identifier: GPL-2.0-or-later /** -*- linux-c -*- *********************************************************** * Linux PPP over Ethernet (PPPoX/PPPoE) Sockets * * PPPoX --- Generic PPP encapsulation socket family * PPPoE --- PPP over Ethernet (RFC 2516) * * Version: 0.7.0 * * 070228 : Fix to allow multiple sessions with same remote MAC and same * session id by including the local device ifindex in the * tuple identifying a session. This also ensures packets can't * be injected into a session from interfaces other than the one * specified by userspace. Florian Zumbiehl <florz@florz.de> * (Oh, BTW, this one is YYMMDD, in case you were wondering ...) * 220102 : Fix module use count on failure in pppoe_create, pppox_sk -acme * 030700 : Fixed connect logic to allow for disconnect. * 270700 : Fixed potential SMP problems; we must protect against * simultaneous invocation of ppp_input * and ppp_unregister_channel. * 040800 : Respect reference count mechanisms on net-devices. * 200800 : fix kfree(skb) in pppoe_rcv (acme) * Module reference count is decremented in the right spot now, * guards against sock_put not actually freeing the sk * in pppoe_release. * 051000 : Initialization cleanup. * 111100 : Fix recvmsg. * 050101 : Fix PADT processing. * 140501 : Use pppoe_rcv_core to handle all backlog. (Alexey) * 170701 : Do not lock_sock with rwlock held. (DaveM) * Ignore discovery frames if user has socket * locked. (DaveM) * Ignore return value of dev_queue_xmit in __pppoe_xmit * or else we may kfree an SKB twice. (DaveM) * 190701 : When doing copies of skb's in __pppoe_xmit, always delete * the original skb that was passed in on success, never on * failure. Delete the copy of the skb on failure to avoid * a memory leak. * 081001 : Misc. cleanup (licence string, non-blocking, prevent * reference of device on close). * 121301 : New ppp channels interface; cannot unregister a channel * from interrupts. Thus, we mark the socket as a ZOMBIE * and do the unregistration later. * 081002 : seq_file support for proc stuff -acme * 111602 : Merge all 2.4 fixes into 2.5/2.6 tree. Label 2.5/2.6 * as version 0.7. Spacing cleanup. * Author: Michal Ostrowski <mostrows@speakeasy.net> * Contributors: * Arnaldo Carvalho de Melo <acme@conectiva.com.br> * David S. Miller (davem@redhat.com) * * License: */ #include <linux/string.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/errno.h> #include <linux/netdevice.h> #include <linux/net.h> #include <linux/inetdevice.h> #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/if_ether.h> #include <linux/if_pppox.h> #include <linux/ppp_channel.h> #include <linux/ppp_defs.h> #include <linux/ppp-ioctl.h> #include <linux/notifier.h> #include <linux/file.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/nsproxy.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/sock.h> #include <linux/uaccess.h> #define PPPOE_HASH_BITS CONFIG_PPPOE_HASH_BITS #define PPPOE_HASH_SIZE (1 << PPPOE_HASH_BITS) #define PPPOE_HASH_MASK (PPPOE_HASH_SIZE - 1) static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb); static const struct proto_ops pppoe_ops; static const struct ppp_channel_ops pppoe_chan_ops; /* per-net private data for this module */ static unsigned int pppoe_net_id __read_mostly; struct pppoe_net { /* * we could use _single_ hash table for all * nets by injecting net id into the hash but * it would increase hash chains and add * a few additional math comparisons messy * as well, moreover in case of SMP less locking * controversy here */ struct pppox_sock __rcu *hash_table[PPPOE_HASH_SIZE]; spinlock_t hash_lock; }; /* * PPPoE could be in the following stages: * 1) Discovery stage (to obtain remote MAC and Session ID) * 2) Session stage (MAC and SID are known) * * Ethernet frames have a special tag for this but * we use simpler approach based on session id */ static inline bool stage_session(__be16 sid) { return sid != 0; } static inline struct pppoe_net *pppoe_pernet(struct net *net) { return net_generic(net, pppoe_net_id); } static inline int cmp_2_addr(struct pppoe_addr *a, struct pppoe_addr *b) { return a->sid == b->sid && ether_addr_equal(a->remote, b->remote); } static inline int cmp_addr(struct pppoe_addr *a, __be16 sid, char *addr) { return a->sid == sid && ether_addr_equal(a->remote, addr); } #if 8 % PPPOE_HASH_BITS #error 8 must be a multiple of PPPOE_HASH_BITS #endif static int hash_item(__be16 sid, unsigned char *addr) { unsigned char hash = 0; unsigned int i; for (i = 0; i < ETH_ALEN; i++) hash ^= addr[i]; for (i = 0; i < sizeof(sid_t) * 8; i += 8) hash ^= (__force __u32)sid >> i; for (i = 8; (i >>= 1) >= PPPOE_HASH_BITS;) hash ^= hash >> i; return hash & PPPOE_HASH_MASK; } /********************************************************************** * * Set/get/delete/rehash items (internal versions) * **********************************************************************/ static struct pppox_sock *__get_item(struct pppoe_net *pn, __be16 sid, unsigned char *addr, int ifindex) { int hash = hash_item(sid, addr); struct pppox_sock *ret; ret = rcu_dereference(pn->hash_table[hash]); while (ret) { if (cmp_addr(&ret->pppoe_pa, sid, addr) && ret->pppoe_ifindex == ifindex) return ret; ret = rcu_dereference(ret->next); } return NULL; } static int __set_item(struct pppoe_net *pn, struct pppox_sock *po) { int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote); struct pppox_sock *ret, *first; first = rcu_dereference_protected(pn->hash_table[hash], lockdep_is_held(&pn->hash_lock)); ret = first; while (ret) { if (cmp_2_addr(&ret->pppoe_pa, &po->pppoe_pa) && ret->pppoe_ifindex == po->pppoe_ifindex) return -EALREADY; ret = rcu_dereference_protected(ret->next, lockdep_is_held(&pn->hash_lock)); } RCU_INIT_POINTER(po->next, first); rcu_assign_pointer(pn->hash_table[hash], po); return 0; } static void __delete_item(struct pppoe_net *pn, __be16 sid, char *addr, int ifindex) { int hash = hash_item(sid, addr); struct pppox_sock *ret, __rcu **src; ret = rcu_dereference_protected(pn->hash_table[hash], lockdep_is_held(&pn->hash_lock)); src = &pn->hash_table[hash]; while (ret) { if (cmp_addr(&ret->pppoe_pa, sid, addr) && ret->pppoe_ifindex == ifindex) { struct pppox_sock *next; next = rcu_dereference_protected(ret->next, lockdep_is_held(&pn->hash_lock)); rcu_assign_pointer(*src, next); break; } src = &ret->next; ret = rcu_dereference_protected(ret->next, lockdep_is_held(&pn->hash_lock)); } } /********************************************************************** * * Set/get/delete/rehash items * **********************************************************************/ static inline struct pppox_sock *get_item(struct pppoe_net *pn, __be16 sid, unsigned char *addr, int ifindex) { struct pppox_sock *po; po = __get_item(pn, sid, addr, ifindex); if (po && !refcount_inc_not_zero(&sk_pppox(po)->sk_refcnt)) po = NULL; return po; } static inline struct pppox_sock *__get_item_by_addr(struct net *net, struct sockaddr_pppox *sp) { struct net_device *dev; struct pppoe_net *pn; struct pppox_sock *pppox_sock = NULL; int ifindex; dev = dev_get_by_name_rcu(net, sp->sa_addr.pppoe.dev); if (dev) { ifindex = dev->ifindex; pn = pppoe_pernet(net); pppox_sock = __get_item(pn, sp->sa_addr.pppoe.sid, sp->sa_addr.pppoe.remote, ifindex); } return pppox_sock; } static inline void delete_item(struct pppoe_net *pn, __be16 sid, char *addr, int ifindex) { spin_lock(&pn->hash_lock); __delete_item(pn, sid, addr, ifindex); spin_unlock(&pn->hash_lock); } /*************************************************************************** * * Handler for device events. * Certain device events require that sockets be unconnected. * **************************************************************************/ static void pppoe_flush_dev(struct net_device *dev) { struct pppoe_net *pn; int i; pn = pppoe_pernet(dev_net(dev)); spin_lock(&pn->hash_lock); for (i = 0; i < PPPOE_HASH_SIZE; i++) { struct pppox_sock *po = rcu_dereference_protected(pn->hash_table[i], lockdep_is_held(&pn->hash_lock)); struct sock *sk; while (po) { while (po && po->pppoe_dev != dev) { po = rcu_dereference_protected(po->next, lockdep_is_held(&pn->hash_lock)); } if (!po) break; sk = sk_pppox(po); /* We always grab the socket lock, followed by the * hash_lock, in that order. Since we should hold the * sock lock while doing any unbinding, we need to * release the lock we're holding. Hold a reference to * the sock so it doesn't disappear as we're jumping * between locks. */ sock_hold(sk); spin_unlock(&pn->hash_lock); lock_sock(sk); if (po->pppoe_dev == dev && sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) { pppox_unbind_sock(sk); sk->sk_state_change(sk); po->pppoe_dev = NULL; dev_put(dev); } release_sock(sk); sock_put(sk); /* Restart the process from the start of the current * hash chain. We dropped locks so the world may have * change from underneath us. */ BUG_ON(pppoe_pernet(dev_net(dev)) == NULL); spin_lock(&pn->hash_lock); po = rcu_dereference_protected(pn->hash_table[i], lockdep_is_held(&pn->hash_lock)); } } spin_unlock(&pn->hash_lock); } static int pppoe_device_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); /* Only look at sockets that are using this specific device. */ switch (event) { case NETDEV_CHANGEADDR: case NETDEV_CHANGEMTU: /* A change in mtu or address is a bad thing, requiring * LCP re-negotiation. */ case NETDEV_GOING_DOWN: case NETDEV_DOWN: /* Find every socket on this device and kill it. */ pppoe_flush_dev(dev); break; default: break; } return NOTIFY_DONE; } static struct notifier_block pppoe_notifier = { .notifier_call = pppoe_device_event, }; /************************************************************************ * * Do the real work of receiving a PPPoE Session frame. * ***********************************************************************/ static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb) { struct pppox_sock *po = pppox_sk(sk); struct pppox_sock *relay_po; /* Backlog receive. Semantics of backlog rcv preclude any code from * executing in lock_sock()/release_sock() bounds; meaning sk->sk_state * can't change. */ if (sk->sk_state & PPPOX_BOUND) { ppp_input(&po->chan, skb); } else if (sk->sk_state & PPPOX_RELAY) { relay_po = __get_item_by_addr(sock_net(sk), &po->pppoe_relay); if (relay_po == NULL) goto abort_kfree; if ((sk_pppox(relay_po)->sk_state & PPPOX_CONNECTED) == 0) goto abort_kfree; if (!__pppoe_xmit(sk_pppox(relay_po), skb)) goto abort_kfree; } else { if (sock_queue_rcv_skb(sk, skb)) goto abort_kfree; } return NET_RX_SUCCESS; abort_kfree: kfree_skb(skb); return NET_RX_DROP; } /************************************************************************ * * Receive wrapper called in BH context. * ***********************************************************************/ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct pppoe_hdr *ph; struct pppox_sock *po; struct pppoe_net *pn; int len; if (skb->pkt_type == PACKET_OTHERHOST) goto drop; skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) goto out; if (skb_mac_header_len(skb) < ETH_HLEN) goto drop; if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr))) goto drop; ph = pppoe_hdr(skb); len = ntohs(ph->length); skb_pull_rcsum(skb, sizeof(*ph)); if (skb->len < len) goto drop; if (pskb_trim_rcsum(skb, len)) goto drop; ph = pppoe_hdr(skb); pn = pppoe_pernet(dev_net(dev)); po = __get_item(pn, ph->sid, eth_hdr(skb)->h_source, dev->ifindex); if (!po) goto drop; return __sk_receive_skb(sk_pppox(po), skb, 0, 1, false); drop: kfree_skb(skb); out: return NET_RX_DROP; } static void pppoe_unbind_sock_work(struct work_struct *work) { struct pppox_sock *po = container_of(work, struct pppox_sock, proto.pppoe.padt_work); struct sock *sk = sk_pppox(po); lock_sock(sk); if (po->pppoe_dev) { dev_put(po->pppoe_dev); po->pppoe_dev = NULL; } pppox_unbind_sock(sk); release_sock(sk); sock_put(sk); } /************************************************************************ * * Receive a PPPoE Discovery frame. * This is solely for detection of PADT frames * ***********************************************************************/ static int pppoe_disc_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct pppoe_hdr *ph; struct pppox_sock *po; struct pppoe_net *pn; skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) goto out; if (skb->pkt_type != PACKET_HOST) goto abort; if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr))) goto abort; ph = pppoe_hdr(skb); if (ph->code != PADT_CODE) goto abort; pn = pppoe_pernet(dev_net(dev)); po = get_item(pn, ph->sid, eth_hdr(skb)->h_source, dev->ifindex); if (po) if (!schedule_work(&po->proto.pppoe.padt_work)) sock_put(sk_pppox(po)); abort: kfree_skb(skb); out: return NET_RX_SUCCESS; /* Lies... :-) */ } static struct packet_type pppoes_ptype __read_mostly = { .type = cpu_to_be16(ETH_P_PPP_SES), .func = pppoe_rcv, }; static struct packet_type pppoed_ptype __read_mostly = { .type = cpu_to_be16(ETH_P_PPP_DISC), .func = pppoe_disc_rcv, }; static struct proto pppoe_sk_proto __read_mostly = { .name = "PPPOE", .owner = THIS_MODULE, .obj_size = sizeof(struct pppox_sock), }; static void pppoe_destruct(struct sock *sk) { skb_queue_purge(&sk->sk_receive_queue); } /*********************************************************************** * * Initialize a new struct sock. * **********************************************************************/ static int pppoe_create(struct net *net, struct socket *sock, int kern) { struct sock *sk; sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto, kern); if (!sk) return -ENOMEM; sock_init_data(sock, sk); sock_set_flag(sk, SOCK_RCU_FREE); sock->state = SS_UNCONNECTED; sock->ops = &pppoe_ops; sk->sk_backlog_rcv = pppoe_rcv_core; sk->sk_destruct = pppoe_destruct; sk->sk_state = PPPOX_NONE; sk->sk_type = SOCK_STREAM; sk->sk_family = PF_PPPOX; sk->sk_protocol = PX_PROTO_OE; INIT_WORK(&pppox_sk(sk)->proto.pppoe.padt_work, pppoe_unbind_sock_work); return 0; } static int pppoe_release(struct socket *sock) { struct sock *sk = sock->sk; struct pppox_sock *po; struct pppoe_net *pn; struct net *net = NULL; if (!sk) return 0; lock_sock(sk); if (sock_flag(sk, SOCK_DEAD)) { release_sock(sk); return -EBADF; } po = pppox_sk(sk); if (po->pppoe_dev) { dev_put(po->pppoe_dev); po->pppoe_dev = NULL; } pppox_unbind_sock(sk); /* Signal the death of the socket. */ sk->sk_state = PPPOX_DEAD; net = sock_net(sk); pn = pppoe_pernet(net); /* * protect "po" from concurrent updates * on pppoe_flush_dev */ delete_item(pn, po->pppoe_pa.sid, po->pppoe_pa.remote, po->pppoe_ifindex); sock_orphan(sk); sock->sk = NULL; release_sock(sk); sock_put(sk); return 0; } static int pppoe_connect(struct socket *sock, struct sockaddr_unsized *uservaddr, int sockaddr_len, int flags) { struct sock *sk = sock->sk; struct sockaddr_pppox *sp = (struct sockaddr_pppox *)uservaddr; struct pppox_sock *po = pppox_sk(sk); struct net_device *dev = NULL; struct pppoe_net *pn; struct net *net = NULL; int error; lock_sock(sk); error = -EINVAL; if (sockaddr_len != sizeof(struct sockaddr_pppox)) goto end; if (sp->sa_protocol != PX_PROTO_OE) goto end; /* Check for already bound sockets */ error = -EBUSY; if ((sk->sk_state & PPPOX_CONNECTED) && stage_session(sp->sa_addr.pppoe.sid)) goto end; /* Check for already disconnected sockets, on attempts to disconnect */ error = -EALREADY; if ((sk->sk_state & PPPOX_DEAD) && !stage_session(sp->sa_addr.pppoe.sid)) goto end; error = 0; /* Delete the old binding */ if (stage_session(po->pppoe_pa.sid)) { pppox_unbind_sock(sk); pn = pppoe_pernet(sock_net(sk)); delete_item(pn, po->pppoe_pa.sid, po->pppoe_pa.remote, po->pppoe_ifindex); if (po->pppoe_dev) { dev_put(po->pppoe_dev); po->pppoe_dev = NULL; } po->pppoe_ifindex = 0; memset(&po->pppoe_pa, 0, sizeof(po->pppoe_pa)); memset(&po->pppoe_relay, 0, sizeof(po->pppoe_relay)); memset(&po->chan, 0, sizeof(po->chan)); po->next = NULL; po->num = 0; sk->sk_state = PPPOX_NONE; } /* Re-bind in session stage only */ if (stage_session(sp->sa_addr.pppoe.sid)) { error = -ENODEV; net = sock_net(sk); dev = dev_get_by_name(net, sp->sa_addr.pppoe.dev); if (!dev) goto err_put; po->pppoe_dev = dev; po->pppoe_ifindex = dev->ifindex; pn = pppoe_pernet(net); if (!(dev->flags & IFF_UP)) { goto err_put; } memcpy(&po->pppoe_pa, &sp->sa_addr.pppoe, sizeof(struct pppoe_addr)); spin_lock(&pn->hash_lock); error = __set_item(pn, po); spin_unlock(&pn->hash_lock); if (error < 0) goto err_put; po->chan.hdrlen = (sizeof(struct pppoe_hdr) + dev->hard_header_len); po->chan.mtu = dev->mtu - sizeof(struct pppoe_hdr) - 2; po->chan.private = sk; po->chan.ops = &pppoe_chan_ops; po->chan.direct_xmit = true; error = ppp_register_net_channel(dev_net(dev), &po->chan); if (error) { delete_item(pn, po->pppoe_pa.sid, po->pppoe_pa.remote, po->pppoe_ifindex); goto err_put; } sk->sk_state = PPPOX_CONNECTED; } po->num = sp->sa_addr.pppoe.sid; end: release_sock(sk); return error; err_put: if (po->pppoe_dev) { dev_put(po->pppoe_dev); po->pppoe_dev = NULL; } goto end; } static int pppoe_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { int len = sizeof(struct sockaddr_pppox); struct sockaddr_pppox sp; sp.sa_family = AF_PPPOX; sp.sa_protocol = PX_PROTO_OE; memcpy(&sp.sa_addr.pppoe, &pppox_sk(sock->sk)->pppoe_pa, sizeof(struct pppoe_addr)); memcpy(uaddr, &sp, len); return len; } static int pppoe_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; struct pppox_sock *po = pppox_sk(sk); int val; int err; switch (cmd) { case PPPIOCGMRU: err = -ENXIO; if (!(sk->sk_state & PPPOX_CONNECTED)) break; err = -EFAULT; if (put_user(po->pppoe_dev->mtu - sizeof(struct pppoe_hdr) - PPP_HDRLEN, (int __user *)arg)) break; err = 0; break; case PPPIOCSMRU: err = -ENXIO; if (!(sk->sk_state & PPPOX_CONNECTED)) break; err = -EFAULT; if (get_user(val, (int __user *)arg)) break; if (val < (po->pppoe_dev->mtu - sizeof(struct pppoe_hdr) - PPP_HDRLEN)) err = 0; else err = -EINVAL; break; case PPPIOCSFLAGS: err = -EFAULT; if (get_user(val, (int __user *)arg)) break; err = 0; break; case PPPOEIOCSFWD: { struct pppox_sock *relay_po; err = -EBUSY; if (sk->sk_state & (PPPOX_BOUND | PPPOX_DEAD)) break; err = -ENOTCONN; if (!(sk->sk_state & PPPOX_CONNECTED)) break; /* PPPoE address from the user specifies an outbound PPPoE address which frames are forwarded to */ err = -EFAULT; if (copy_from_user(&po->pppoe_relay, (void __user *)arg, sizeof(struct sockaddr_pppox))) break; err = -EINVAL; if (po->pppoe_relay.sa_family != AF_PPPOX || po->pppoe_relay.sa_protocol != PX_PROTO_OE) break; /* Check that the socket referenced by the address actually exists. */ rcu_read_lock(); relay_po = __get_item_by_addr(sock_net(sk), &po->pppoe_relay); rcu_read_unlock(); if (!relay_po) break; sk->sk_state |= PPPOX_RELAY; err = 0; break; } case PPPOEIOCDFWD: err = -EALREADY; if (!(sk->sk_state & PPPOX_RELAY)) break; sk->sk_state &= ~PPPOX_RELAY; err = 0; break; default: err = -ENOTTY; } return err; } static int pppoe_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) { struct sk_buff *skb; struct sock *sk = sock->sk; struct pppox_sock *po = pppox_sk(sk); int error; struct pppoe_hdr hdr; struct pppoe_hdr *ph; struct net_device *dev; char *start; int hlen; lock_sock(sk); if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) { error = -ENOTCONN; goto end; } hdr.ver = 1; hdr.type = 1; hdr.code = 0; hdr.sid = po->num; dev = po->pppoe_dev; error = -EMSGSIZE; if (total_len > (dev->mtu + dev->hard_header_len)) goto end; hlen = LL_RESERVED_SPACE(dev); skb = sock_wmalloc(sk, hlen + sizeof(*ph) + total_len + dev->needed_tailroom, 0, GFP_KERNEL); if (!skb) { error = -ENOMEM; goto end; } /* Reserve space for headers. */ skb_reserve(skb, hlen); skb_reset_network_header(skb); skb->dev = dev; skb->priority = READ_ONCE(sk->sk_priority); skb->protocol = cpu_to_be16(ETH_P_PPP_SES); ph = skb_put(skb, total_len + sizeof(struct pppoe_hdr)); start = (char *)&ph->tag[0]; error = memcpy_from_msg(start, m, total_len); if (error < 0) { kfree_skb(skb); goto end; } error = total_len; dev_hard_header(skb, dev, ETH_P_PPP_SES, po->pppoe_pa.remote, NULL, total_len); memcpy(ph, &hdr, sizeof(struct pppoe_hdr)); ph->length = htons(total_len); dev_queue_xmit(skb); end: release_sock(sk); return error; } /************************************************************************ * * xmit function for internal use. * ***********************************************************************/ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb) { struct pppox_sock *po = pppox_sk(sk); struct net_device *dev = po->pppoe_dev; struct pppoe_hdr *ph; int data_len = skb->len; /* The higher-level PPP code (ppp_unregister_channel()) ensures the PPP * xmit operations conclude prior to an unregistration call. Thus * sk->sk_state cannot change, so we don't need to do lock_sock(). * But, we also can't do a lock_sock since that introduces a potential * deadlock as we'd reverse the lock ordering used when calling * ppp_unregister_channel(). */ if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) goto abort; if (!dev) goto abort; /* Copy the data if there is no space for the header or if it's * read-only. */ if (skb_cow_head(skb, LL_RESERVED_SPACE(dev) + sizeof(*ph))) goto abort; __skb_push(skb, sizeof(*ph)); skb_reset_network_header(skb); ph = pppoe_hdr(skb); ph->ver = 1; ph->type = 1; ph->code = 0; ph->sid = po->num; ph->length = htons(data_len); skb->protocol = cpu_to_be16(ETH_P_PPP_SES); skb->dev = dev; dev_hard_header(skb, dev, ETH_P_PPP_SES, po->pppoe_pa.remote, NULL, data_len); dev_queue_xmit(skb); return 1; abort: kfree_skb(skb); return 1; } /************************************************************************ * * xmit function called by generic PPP driver * sends PPP frame over PPPoE socket * ***********************************************************************/ static int pppoe_xmit(struct ppp_channel *chan, struct sk_buff *skb) { struct sock *sk = chan->private; return __pppoe_xmit(sk, skb); } static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx, struct net_device_path *path, const struct ppp_channel *chan) { struct sock *sk = chan->private; struct pppox_sock *po = pppox_sk(sk); struct net_device *dev = po->pppoe_dev; if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED) || !dev) return -1; path->type = DEV_PATH_PPPOE; path->encap.proto = htons(ETH_P_PPP_SES); path->encap.id = be16_to_cpu(po->num); memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN); memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN); path->dev = ctx->dev; ctx->dev = dev; return 0; } static const struct ppp_channel_ops pppoe_chan_ops = { .start_xmit = pppoe_xmit, .fill_forward_path = pppoe_fill_forward_path, }; static int pppoe_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; int error = 0; if (sk->sk_state & PPPOX_BOUND) return -EIO; skb = skb_recv_datagram(sk, flags, &error); if (!skb) return error; total_len = min_t(size_t, total_len, skb->len); error = skb_copy_datagram_msg(skb, 0, m, total_len); if (error == 0) { consume_skb(skb); return total_len; } kfree_skb(skb); return error; } #ifdef CONFIG_PROC_FS static int pppoe_seq_show(struct seq_file *seq, void *v) { struct pppox_sock *po; char *dev_name; if (v == SEQ_START_TOKEN) { seq_puts(seq, "Id Address Device\n"); goto out; } po = v; dev_name = po->pppoe_pa.dev; seq_printf(seq, "%08X %pM %8s\n", po->pppoe_pa.sid, po->pppoe_pa.remote, dev_name); out: return 0; } static inline struct pppox_sock *pppoe_get_idx(struct pppoe_net *pn, loff_t pos) { struct pppox_sock *po; int i; for (i = 0; i < PPPOE_HASH_SIZE; i++) { po = rcu_dereference(pn->hash_table[i]); while (po) { if (!pos--) goto out; po = rcu_dereference(po->next); } } out: return po; } static void *pppoe_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { struct pppoe_net *pn = pppoe_pernet(seq_file_net(seq)); loff_t l = *pos; rcu_read_lock(); return l ? pppoe_get_idx(pn, --l) : SEQ_START_TOKEN; } static void *pppoe_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct pppoe_net *pn = pppoe_pernet(seq_file_net(seq)); struct pppox_sock *po, *next; ++*pos; if (v == SEQ_START_TOKEN) { po = pppoe_get_idx(pn, 0); goto out; } po = v; next = rcu_dereference(po->next); if (next) po = next; else { int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote); po = NULL; while (++hash < PPPOE_HASH_SIZE) { po = rcu_dereference(pn->hash_table[hash]); if (po) break; } } out: return po; } static void pppoe_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { rcu_read_unlock(); } static const struct seq_operations pppoe_seq_ops = { .start = pppoe_seq_start, .next = pppoe_seq_next, .stop = pppoe_seq_stop, .show = pppoe_seq_show, }; #endif /* CONFIG_PROC_FS */ static const struct proto_ops pppoe_ops = { .family = AF_PPPOX, .owner = THIS_MODULE, .release = pppoe_release, .bind = sock_no_bind, .connect = pppoe_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = pppoe_getname, .poll = datagram_poll, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .sendmsg = pppoe_sendmsg, .recvmsg = pppoe_recvmsg, .mmap = sock_no_mmap, .ioctl = pppox_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = pppox_compat_ioctl, #endif }; static const struct pppox_proto pppoe_proto = { .create = pppoe_create, .ioctl = pppoe_ioctl, .owner = THIS_MODULE, }; static __net_init int pppoe_init_net(struct net *net) { struct pppoe_net *pn = pppoe_pernet(net); struct proc_dir_entry *pde; spin_lock_init(&pn->hash_lock); pde = proc_create_net("pppoe", 0444, net->proc_net, &pppoe_seq_ops, sizeof(struct seq_net_private)); #ifdef CONFIG_PROC_FS if (!pde) return -ENOMEM; #endif return 0; } static __net_exit void pppoe_exit_net(struct net *net) { remove_proc_entry("pppoe", net->proc_net); } static struct pernet_operations pppoe_net_ops = { .init = pppoe_init_net, .exit = pppoe_exit_net, .id = &pppoe_net_id, .size = sizeof(struct pppoe_net), }; static int __init pppoe_init(void) { int err; err = register_pernet_device(&pppoe_net_ops); if (err) goto out; err = proto_register(&pppoe_sk_proto, 0); if (err) goto out_unregister_net_ops; err = register_pppox_proto(PX_PROTO_OE, &pppoe_proto); if (err) goto out_unregister_pppoe_proto; dev_add_pack(&pppoes_ptype); dev_add_pack(&pppoed_ptype); register_netdevice_notifier(&pppoe_notifier); return 0; out_unregister_pppoe_proto: proto_unregister(&pppoe_sk_proto); out_unregister_net_ops: unregister_pernet_device(&pppoe_net_ops); out: return err; } static void __exit pppoe_exit(void) { unregister_netdevice_notifier(&pppoe_notifier); dev_remove_pack(&pppoed_ptype); dev_remove_pack(&pppoes_ptype); unregister_pppox_proto(PX_PROTO_OE); proto_unregister(&pppoe_sk_proto); unregister_pernet_device(&pppoe_net_ops); } module_init(pppoe_init); module_exit(pppoe_exit); MODULE_AUTHOR("Michal Ostrowski <mostrows@speakeasy.net>"); MODULE_DESCRIPTION("PPP over Ethernet driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NET_PF_PROTO(PF_PPPOX, PX_PROTO_OE);
110 35 35 33 32 31 95 93 93 91 92 129 35 95 48 46 48 48 85 4 4 108 110 108 67 67 21 21 21 12 11 20 21 244 75 161 1 72 72 71 15 178 178 49 1 48 49 49 37 26 25 48 17 32 24 16 104 39 17 35 65 38 22 9 15 10 10 10 8 7 4 5 2 6 2 1 7 7 5 4 2 3 3 3 1 3 10 5 10 5 25 24 12 12 2 1 10 10 12 16 19 19 14 10 10 5 19 19 14 14 8 8 8 6 4 4 48 49 33 26 16 6 20 20 2 47 45 48 1 2 3 46 58 6 35 7 7 2 130 21 10 15 192 50 50 21 1 10 4 6 20 5 7 20 41 49 11 49 4 13 37 6 49 50 50 6 49 1 23 21 20 2 21 24 23 4 21 5 4 18 5 2 3 17 6 7 2 5 5 2 3 2 1 5 1 5 1 2 4 8 5 5 5 5 1 2 3 4 8 8 8 5 5 15 15 15 13 2 13 3 11 8 10 14 13 12 2 5 30 161 161 32 60 159 158 161 20 6 161 22 198 202 23 23 23 138 139 91 1 29 77 3 140 140 77 179 61 111 176 177 179 178 5 6 136 140 82 76 178 177 5 156 38 38 38 1 6 31 38 28 2 43 60 10 50 59 61 60 20 6 15 15 21 21 5 5 11 11 11 2 9 11 1 1 10 2 2 2 2 2 2 2 2 2 4 50 22 51 50 20 4 18 2 16 6 35 31 1 1 51 50 20 31 40 10 31 41 40 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 111 48 70 1 2 58 13 50 51 19 48 2 2 2 10 34 73 41 19 20 1 37 39 1 19 795 793 637 7 1 11 11 2 18 766 74 40 1 38 22 18 39 40 38 14 24 760 758 2 745 737 55 56 105 109 766 19 739 36 3 2 62 47 4 14 52 84 76 12 37 188 65 124 54 147 125 21 21 86 86 85 84 84 84 84 19 86 85 63 64 17 49 21 1 1 21 21 21 21 21 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 13 13 13 13 50 50 9 9 1 1 1 16 19 4 17 19 2 9 12 15 2 5 17 17 11 2 2 9 27 3 24 7 1 1 2 2 27 27 1 21 4 6 6 17 17 17 11 11 8 8 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 // SPDX-License-Identifier: GPL-2.0-only /* * xfrm_policy.c * * Changes: * Mitsuru KANDA @USAGI * Kazunori MIYAZAWA @USAGI * Kunihiro Ishiguro <kunihiro@ipinfusion.com> * IPv6 support * Kazunori MIYAZAWA @USAGI * YOSHIFUJI Hideaki * Split up af-specific portion * Derek Atkins <derek@ihtfp.com> Add the post_input processor * */ #include <linux/err.h> #include <linux/slab.h> #include <linux/kmod.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/workqueue.h> #include <linux/notifier.h> #include <linux/netdevice.h> #include <linux/netfilter.h> #include <linux/module.h> #include <linux/cache.h> #include <linux/cpu.h> #include <linux/audit.h> #include <linux/rhashtable.h> #include <linux/if_tunnel.h> #include <linux/icmp.h> #include <net/dst.h> #include <net/flow.h> #include <net/inet_ecn.h> #include <net/xfrm.h> #include <net/ip.h> #include <net/gre.h> #if IS_ENABLED(CONFIG_IPV6_MIP6) #include <net/mip6.h> #endif #ifdef CONFIG_XFRM_STATISTICS #include <net/snmp.h> #endif #ifdef CONFIG_XFRM_ESPINTCP #include <net/espintcp.h> #endif #include <net/inet_dscp.h> #include "xfrm_hash.h" #define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10)) #define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ)) #define XFRM_MAX_QUEUE_LEN 100 struct xfrm_flo { struct dst_entry *dst_orig; u8 flags; }; /* prefixes smaller than this are stored in lists, not trees. */ #define INEXACT_PREFIXLEN_IPV4 16 #define INEXACT_PREFIXLEN_IPV6 48 struct xfrm_pol_inexact_node { struct rb_node node; union { xfrm_address_t addr; struct rcu_head rcu; }; u8 prefixlen; struct rb_root root; /* the policies matching this node, can be empty list */ struct hlist_head hhead; }; /* xfrm inexact policy search tree: * xfrm_pol_inexact_bin = hash(dir,type,family,if_id); * | * +---- root_d: sorted by daddr:prefix * | | * | xfrm_pol_inexact_node * | | * | +- root: sorted by saddr/prefix * | | | * | | xfrm_pol_inexact_node * | | | * | | + root: unused * | | | * | | + hhead: saddr:daddr policies * | | * | +- coarse policies and all any:daddr policies * | * +---- root_s: sorted by saddr:prefix * | | * | xfrm_pol_inexact_node * | | * | + root: unused * | | * | + hhead: saddr:any policies * | * +---- coarse policies and all any:any policies * * Lookups return four candidate lists: * 1. any:any list from top-level xfrm_pol_inexact_bin * 2. any:daddr list from daddr tree * 3. saddr:daddr list from 2nd level daddr tree * 4. saddr:any list from saddr tree * * This result set then needs to be searched for the policy with * the lowest priority. If two candidates have the same priority, the * struct xfrm_policy pos member with the lower number is used. * * This replicates previous single-list-search algorithm which would * return first matching policy in the (ordered-by-priority) list. */ struct xfrm_pol_inexact_key { possible_net_t net; u32 if_id; u16 family; u8 dir, type; }; struct xfrm_pol_inexact_bin { struct xfrm_pol_inexact_key k; struct rhash_head head; /* list containing '*:*' policies */ struct hlist_head hhead; seqcount_spinlock_t count; /* tree sorted by daddr/prefix */ struct rb_root root_d; /* tree sorted by saddr/prefix */ struct rb_root root_s; /* slow path below */ struct list_head inexact_bins; struct rcu_head rcu; }; enum xfrm_pol_inexact_candidate_type { XFRM_POL_CAND_BOTH, XFRM_POL_CAND_SADDR, XFRM_POL_CAND_DADDR, XFRM_POL_CAND_ANY, XFRM_POL_CAND_MAX, }; struct xfrm_pol_inexact_candidates { struct hlist_head *res[XFRM_POL_CAND_MAX]; }; struct xfrm_flow_keys { struct flow_dissector_key_basic basic; struct flow_dissector_key_control control; union { struct flow_dissector_key_ipv4_addrs ipv4; struct flow_dissector_key_ipv6_addrs ipv6; } addrs; struct flow_dissector_key_ip ip; struct flow_dissector_key_icmp icmp; struct flow_dissector_key_ports ports; struct flow_dissector_key_keyid gre; }; static struct flow_dissector xfrm_session_dissector __ro_after_init; static DEFINE_SPINLOCK(xfrm_if_cb_lock); static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly; static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1] __read_mostly; static struct kmem_cache *xfrm_dst_cache __ro_after_init; static struct rhashtable xfrm_policy_inexact_table; static const struct rhashtable_params xfrm_pol_inexact_params; static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr); static int stale_bundle(struct dst_entry *dst); static int xfrm_bundle_ok(struct xfrm_dst *xdst); static void xfrm_policy_queue_process(struct timer_list *t); static void __xfrm_policy_link(struct xfrm_policy *pol, int dir); static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int dir); static struct xfrm_pol_inexact_bin * xfrm_policy_inexact_lookup(struct net *net, u8 type, u16 family, u8 dir, u32 if_id); static struct xfrm_pol_inexact_bin * xfrm_policy_inexact_lookup_rcu(struct net *net, u8 type, u16 family, u8 dir, u32 if_id); static struct xfrm_policy * xfrm_policy_insert_list(struct hlist_head *chain, struct xfrm_policy *policy, bool excl); static bool xfrm_policy_find_inexact_candidates(struct xfrm_pol_inexact_candidates *cand, struct xfrm_pol_inexact_bin *b, const xfrm_address_t *saddr, const xfrm_address_t *daddr); static inline bool xfrm_pol_hold_rcu(struct xfrm_policy *policy) { return refcount_inc_not_zero(&policy->refcnt); } static inline bool __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) { const struct flowi4 *fl4 = &fl->u.ip4; return addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) && addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) && !((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) && !((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) && (fl4->flowi4_proto == sel->proto || !sel->proto) && (fl4->flowi4_oif == sel->ifindex || !sel->ifindex); } static inline bool __xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) { const struct flowi6 *fl6 = &fl->u.ip6; return addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) && addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) && !((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) && !((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) && (fl6->flowi6_proto == sel->proto || !sel->proto) && (fl6->flowi6_oif == sel->ifindex || !sel->ifindex); } bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl, unsigned short family) { switch (family) { case AF_INET: return __xfrm4_selector_match(sel, fl); case AF_INET6: return __xfrm6_selector_match(sel, fl); } return false; } static const struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) { const struct xfrm_policy_afinfo *afinfo; if (unlikely(family >= ARRAY_SIZE(xfrm_policy_afinfo))) return NULL; rcu_read_lock(); afinfo = rcu_dereference(xfrm_policy_afinfo[family]); if (unlikely(!afinfo)) rcu_read_unlock(); return afinfo; } /* Called with rcu_read_lock(). */ static const struct xfrm_if_cb *xfrm_if_get_cb(void) { return rcu_dereference(xfrm_if_cb); } struct dst_entry *__xfrm_dst_lookup(int family, const struct xfrm_dst_lookup_params *params) { const struct xfrm_policy_afinfo *afinfo; struct dst_entry *dst; afinfo = xfrm_policy_get_afinfo(family); if (unlikely(afinfo == NULL)) return ERR_PTR(-EAFNOSUPPORT); dst = afinfo->dst_lookup(params); rcu_read_unlock(); return dst; } EXPORT_SYMBOL(__xfrm_dst_lookup); static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, dscp_t dscp, int oif, xfrm_address_t *prev_saddr, xfrm_address_t *prev_daddr, int family, u32 mark) { struct xfrm_dst_lookup_params params; struct net *net = xs_net(x); xfrm_address_t *saddr = &x->props.saddr; xfrm_address_t *daddr = &x->id.daddr; struct dst_entry *dst; if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) { saddr = x->coaddr; daddr = prev_daddr; } if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) { saddr = prev_saddr; daddr = x->coaddr; } params.net = net; params.saddr = saddr; params.daddr = daddr; params.dscp = dscp; params.oif = oif; params.mark = mark; params.ipproto = x->id.proto; if (x->encap) { switch (x->encap->encap_type) { case UDP_ENCAP_ESPINUDP: params.ipproto = IPPROTO_UDP; params.uli.ports.sport = x->encap->encap_sport; params.uli.ports.dport = x->encap->encap_dport; break; case TCP_ENCAP_ESPINTCP: params.ipproto = IPPROTO_TCP; params.uli.ports.sport = x->encap->encap_sport; params.uli.ports.dport = x->encap->encap_dport; break; } } dst = __xfrm_dst_lookup(family, &params); if (!IS_ERR(dst)) { if (prev_saddr != saddr) memcpy(prev_saddr, saddr, sizeof(*prev_saddr)); if (prev_daddr != daddr) memcpy(prev_daddr, daddr, sizeof(*prev_daddr)); } return dst; } static inline unsigned long make_jiffies(long secs) { if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ) return MAX_SCHEDULE_TIMEOUT-1; else return secs*HZ; } static void xfrm_policy_timer(struct timer_list *t) { struct xfrm_policy *xp = timer_container_of(xp, t, timer); time64_t now = ktime_get_real_seconds(); time64_t next = TIME64_MAX; int warn = 0; int dir; read_lock(&xp->lock); if (unlikely(xp->walk.dead)) goto out; dir = xfrm_policy_id2dir(xp->index); if (xp->lft.hard_add_expires_seconds) { time64_t tmo = xp->lft.hard_add_expires_seconds + xp->curlft.add_time - now; if (tmo <= 0) goto expired; if (tmo < next) next = tmo; } if (xp->lft.hard_use_expires_seconds) { time64_t tmo = xp->lft.hard_use_expires_seconds + (READ_ONCE(xp->curlft.use_time) ? : xp->curlft.add_time) - now; if (tmo <= 0) goto expired; if (tmo < next) next = tmo; } if (xp->lft.soft_add_expires_seconds) { time64_t tmo = xp->lft.soft_add_expires_seconds + xp->curlft.add_time - now; if (tmo <= 0) { warn = 1; tmo = XFRM_KM_TIMEOUT; } if (tmo < next) next = tmo; } if (xp->lft.soft_use_expires_seconds) { time64_t tmo = xp->lft.soft_use_expires_seconds + (READ_ONCE(xp->curlft.use_time) ? : xp->curlft.add_time) - now; if (tmo <= 0) { warn = 1; tmo = XFRM_KM_TIMEOUT; } if (tmo < next) next = tmo; } if (warn) km_policy_expired(xp, dir, 0, 0); if (next != TIME64_MAX && !mod_timer(&xp->timer, jiffies + make_jiffies(next))) xfrm_pol_hold(xp); out: read_unlock(&xp->lock); xfrm_pol_put(xp); return; expired: read_unlock(&xp->lock); if (!xfrm_policy_delete(xp, dir)) km_policy_expired(xp, dir, 1, 0); xfrm_pol_put(xp); } /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2 * SPD calls. */ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) { struct xfrm_policy *policy; policy = kzalloc(sizeof(struct xfrm_policy), gfp); if (policy) { write_pnet(&policy->xp_net, net); INIT_LIST_HEAD(&policy->walk.all); INIT_HLIST_HEAD(&policy->state_cache_list); INIT_HLIST_NODE(&policy->bydst); INIT_HLIST_NODE(&policy->byidx); rwlock_init(&policy->lock); refcount_set(&policy->refcnt, 1); skb_queue_head_init(&policy->polq.hold_queue); timer_setup(&policy->timer, xfrm_policy_timer, 0); timer_setup(&policy->polq.hold_timer, xfrm_policy_queue_process, 0); } return policy; } EXPORT_SYMBOL(xfrm_policy_alloc); static void xfrm_policy_destroy_rcu(struct rcu_head *head) { struct xfrm_policy *policy = container_of(head, struct xfrm_policy, rcu); security_xfrm_policy_free(policy->security); kfree(policy); } /* Destroy xfrm_policy: descendant resources must be released to this moment. */ void xfrm_policy_destroy(struct xfrm_policy *policy) { BUG_ON(!policy->walk.dead); if (timer_delete(&policy->timer) || timer_delete(&policy->polq.hold_timer)) BUG(); xfrm_dev_policy_free(policy); call_rcu(&policy->rcu, xfrm_policy_destroy_rcu); } EXPORT_SYMBOL(xfrm_policy_destroy); /* Rule must be locked. Release descendant resources, announce * entry dead. The rule must be unlinked from lists to the moment. */ static void xfrm_policy_kill(struct xfrm_policy *policy) { struct net *net = xp_net(policy); struct xfrm_state *x; xfrm_dev_policy_delete(policy); write_lock_bh(&policy->lock); policy->walk.dead = 1; write_unlock_bh(&policy->lock); atomic_inc(&policy->genid); if (timer_delete(&policy->polq.hold_timer)) xfrm_pol_put(policy); skb_queue_purge(&policy->polq.hold_queue); if (timer_delete(&policy->timer)) xfrm_pol_put(policy); /* XXX: Flush state cache */ spin_lock_bh(&net->xfrm.xfrm_state_lock); hlist_for_each_entry_rcu(x, &policy->state_cache_list, state_cache) { hlist_del_init_rcu(&x->state_cache); } spin_unlock_bh(&net->xfrm.xfrm_state_lock); xfrm_pol_put(policy); } static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; static inline unsigned int idx_hash(struct net *net, u32 index) { return __idx_hash(index, net->xfrm.policy_idx_hmask); } /* calculate policy hash thresholds */ static void __get_hash_thresh(struct net *net, unsigned short family, int dir, u8 *dbits, u8 *sbits) { switch (family) { case AF_INET: *dbits = net->xfrm.policy_bydst[dir].dbits4; *sbits = net->xfrm.policy_bydst[dir].sbits4; break; case AF_INET6: *dbits = net->xfrm.policy_bydst[dir].dbits6; *sbits = net->xfrm.policy_bydst[dir].sbits6; break; default: *dbits = 0; *sbits = 0; } } static struct hlist_head *policy_hash_bysel(struct net *net, const struct xfrm_selector *sel, unsigned short family, int dir) { unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int hash; u8 dbits; u8 sbits; __get_hash_thresh(net, family, dir, &dbits, &sbits); hash = __sel_hash(sel, family, hmask, dbits, sbits); if (hash == hmask + 1) return NULL; return rcu_dereference_check(net->xfrm.policy_bydst[dir].table, lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash; } static struct hlist_head *policy_hash_direct(struct net *net, const xfrm_address_t *daddr, const xfrm_address_t *saddr, unsigned short family, int dir) { unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int hash; u8 dbits; u8 sbits; __get_hash_thresh(net, family, dir, &dbits, &sbits); hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits); return rcu_dereference_check(net->xfrm.policy_bydst[dir].table, lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash; } static void xfrm_dst_hash_transfer(struct net *net, struct hlist_head *list, struct hlist_head *ndsttable, unsigned int nhashmask, int dir) { struct hlist_node *tmp, *entry0 = NULL; struct xfrm_policy *pol; unsigned int h0 = 0; u8 dbits; u8 sbits; redo: hlist_for_each_entry_safe(pol, tmp, list, bydst) { unsigned int h; __get_hash_thresh(net, pol->family, dir, &dbits, &sbits); h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, pol->family, nhashmask, dbits, sbits); if (!entry0 || pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) { hlist_del_rcu(&pol->bydst); hlist_add_head_rcu(&pol->bydst, ndsttable + h); h0 = h; } else { if (h != h0) continue; hlist_del_rcu(&pol->bydst); hlist_add_behind_rcu(&pol->bydst, entry0); } entry0 = &pol->bydst; } if (!hlist_empty(list)) { entry0 = NULL; goto redo; } } static void xfrm_idx_hash_transfer(struct hlist_head *list, struct hlist_head *nidxtable, unsigned int nhashmask) { struct hlist_node *tmp; struct xfrm_policy *pol; hlist_for_each_entry_safe(pol, tmp, list, byidx) { unsigned int h; h = __idx_hash(pol->index, nhashmask); hlist_add_head(&pol->byidx, nidxtable+h); } } static unsigned long xfrm_new_hash_mask(unsigned int old_hmask) { return ((old_hmask + 1) << 1) - 1; } static void xfrm_bydst_resize(struct net *net, int dir) { unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int nhashmask = xfrm_new_hash_mask(hmask); unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); struct hlist_head *ndst = xfrm_hash_alloc(nsize); struct hlist_head *odst; int i; if (!ndst) return; spin_lock_bh(&net->xfrm.xfrm_policy_lock); write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation); odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table, lockdep_is_held(&net->xfrm.xfrm_policy_lock)); for (i = hmask; i >= 0; i--) xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir); rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst); net->xfrm.policy_bydst[dir].hmask = nhashmask; write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); synchronize_rcu(); xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); } static void xfrm_byidx_resize(struct net *net) { unsigned int hmask = net->xfrm.policy_idx_hmask; unsigned int nhashmask = xfrm_new_hash_mask(hmask); unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); struct hlist_head *oidx = net->xfrm.policy_byidx; struct hlist_head *nidx = xfrm_hash_alloc(nsize); int i; if (!nidx) return; spin_lock_bh(&net->xfrm.xfrm_policy_lock); for (i = hmask; i >= 0; i--) xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); net->xfrm.policy_byidx = nidx; net->xfrm.policy_idx_hmask = nhashmask; spin_unlock_bh(&net->xfrm.xfrm_policy_lock); xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); } static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total) { unsigned int cnt = net->xfrm.policy_count[dir]; unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; if (total) *total += cnt; if ((hmask + 1) < xfrm_policy_hashmax && cnt > hmask) return 1; return 0; } static inline int xfrm_byidx_should_resize(struct net *net, int total) { unsigned int hmask = net->xfrm.policy_idx_hmask; if ((hmask + 1) < xfrm_policy_hashmax && total > hmask) return 1; return 0; } void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) { si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN]; si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT]; si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD]; si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; si->spdhcnt = net->xfrm.policy_idx_hmask; si->spdhmcnt = xfrm_policy_hashmax; } EXPORT_SYMBOL(xfrm_spd_getinfo); static DEFINE_MUTEX(hash_resize_mutex); static void xfrm_hash_resize(struct work_struct *work) { struct net *net = container_of(work, struct net, xfrm.policy_hash_work); int dir, total; mutex_lock(&hash_resize_mutex); total = 0; for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { if (xfrm_bydst_should_resize(net, dir, &total)) xfrm_bydst_resize(net, dir); } if (xfrm_byidx_should_resize(net, total)) xfrm_byidx_resize(net); mutex_unlock(&hash_resize_mutex); } /* Make sure *pol can be inserted into fastbin. * Useful to check that later insert requests will be successful * (provided xfrm_policy_lock is held throughout). */ static struct xfrm_pol_inexact_bin * xfrm_policy_inexact_alloc_bin(const struct xfrm_policy *pol, u8 dir) { struct xfrm_pol_inexact_bin *bin, *prev; struct xfrm_pol_inexact_key k = { .family = pol->family, .type = pol->type, .dir = dir, .if_id = pol->if_id, }; struct net *net = xp_net(pol); lockdep_assert_held(&net->xfrm.xfrm_policy_lock); write_pnet(&k.net, net); bin = rhashtable_lookup_fast(&xfrm_policy_inexact_table, &k, xfrm_pol_inexact_params); if (bin) return bin; bin = kzalloc(sizeof(*bin), GFP_ATOMIC); if (!bin) return NULL; bin->k = k; INIT_HLIST_HEAD(&bin->hhead); bin->root_d = RB_ROOT; bin->root_s = RB_ROOT; seqcount_spinlock_init(&bin->count, &net->xfrm.xfrm_policy_lock); prev = rhashtable_lookup_get_insert_key(&xfrm_policy_inexact_table, &bin->k, &bin->head, xfrm_pol_inexact_params); if (!prev) { list_add(&bin->inexact_bins, &net->xfrm.inexact_bins); return bin; } kfree(bin); return IS_ERR(prev) ? NULL : prev; } static bool xfrm_pol_inexact_addr_use_any_list(const xfrm_address_t *addr, int family, u8 prefixlen) { if (xfrm_addr_any(addr, family)) return true; if (family == AF_INET6 && prefixlen < INEXACT_PREFIXLEN_IPV6) return true; if (family == AF_INET && prefixlen < INEXACT_PREFIXLEN_IPV4) return true; return false; } static bool xfrm_policy_inexact_insert_use_any_list(const struct xfrm_policy *policy) { const xfrm_address_t *addr; bool saddr_any, daddr_any; u8 prefixlen; addr = &policy->selector.saddr; prefixlen = policy->selector.prefixlen_s; saddr_any = xfrm_pol_inexact_addr_use_any_list(addr, policy->family, prefixlen); addr = &policy->selector.daddr; prefixlen = policy->selector.prefixlen_d; daddr_any = xfrm_pol_inexact_addr_use_any_list(addr, policy->family, prefixlen); return saddr_any && daddr_any; } static void xfrm_pol_inexact_node_init(struct xfrm_pol_inexact_node *node, const xfrm_address_t *addr, u8 prefixlen) { node->addr = *addr; node->prefixlen = prefixlen; } static struct xfrm_pol_inexact_node * xfrm_pol_inexact_node_alloc(const xfrm_address_t *addr, u8 prefixlen) { struct xfrm_pol_inexact_node *node; node = kzalloc(sizeof(*node), GFP_ATOMIC); if (node) xfrm_pol_inexact_node_init(node, addr, prefixlen); return node; } static int xfrm_policy_addr_delta(const xfrm_address_t *a, const xfrm_address_t *b, u8 prefixlen, u16 family) { u32 ma, mb, mask; unsigned int pdw, pbi; int delta = 0; switch (family) { case AF_INET: if (prefixlen == 0) return 0; mask = ~0U << (32 - prefixlen); ma = ntohl(a->a4) & mask; mb = ntohl(b->a4) & mask; if (ma < mb) delta = -1; else if (ma > mb) delta = 1; break; case AF_INET6: pdw = prefixlen >> 5; pbi = prefixlen & 0x1f; if (pdw) { delta = memcmp(a->a6, b->a6, pdw << 2); if (delta) return delta; } if (pbi) { mask = ~0U << (32 - pbi); ma = ntohl(a->a6[pdw]) & mask; mb = ntohl(b->a6[pdw]) & mask; if (ma < mb) delta = -1; else if (ma > mb) delta = 1; } break; default: break; } return delta; } static void xfrm_policy_inexact_list_reinsert(struct net *net, struct xfrm_pol_inexact_node *n, u16 family) { unsigned int matched_s, matched_d; struct xfrm_policy *policy, *p; matched_s = 0; matched_d = 0; list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) { struct hlist_node *newpos = NULL; bool matches_s, matches_d; if (policy->walk.dead || !policy->bydst_reinsert) continue; WARN_ON_ONCE(policy->family != family); policy->bydst_reinsert = false; hlist_for_each_entry(p, &n->hhead, bydst) { if (policy->priority > p->priority) newpos = &p->bydst; else if (policy->priority == p->priority && policy->pos > p->pos) newpos = &p->bydst; else break; } if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET) hlist_add_behind_rcu(&policy->bydst, newpos); else hlist_add_head_rcu(&policy->bydst, &n->hhead); /* paranoia checks follow. * Check that the reinserted policy matches at least * saddr or daddr for current node prefix. * * Matching both is fine, matching saddr in one policy * (but not daddr) and then matching only daddr in another * is a bug. */ matches_s = xfrm_policy_addr_delta(&policy->selector.saddr, &n->addr, n->prefixlen, family) == 0; matches_d = xfrm_policy_addr_delta(&policy->selector.daddr, &n->addr, n->prefixlen, family) == 0; if (matches_s && matches_d) continue; WARN_ON_ONCE(!matches_s && !matches_d); if (matches_s) matched_s++; if (matches_d) matched_d++; WARN_ON_ONCE(matched_s && matched_d); } } static void xfrm_policy_inexact_node_reinsert(struct net *net, struct xfrm_pol_inexact_node *n, struct rb_root *new, u16 family) { struct xfrm_pol_inexact_node *node; struct rb_node **p, *parent; /* we should not have another subtree here */ WARN_ON_ONCE(!RB_EMPTY_ROOT(&n->root)); restart: parent = NULL; p = &new->rb_node; while (*p) { u8 prefixlen; int delta; parent = *p; node = rb_entry(*p, struct xfrm_pol_inexact_node, node); prefixlen = min(node->prefixlen, n->prefixlen); delta = xfrm_policy_addr_delta(&n->addr, &node->addr, prefixlen, family); if (delta < 0) { p = &parent->rb_left; } else if (delta > 0) { p = &parent->rb_right; } else { bool same_prefixlen = node->prefixlen == n->prefixlen; struct xfrm_policy *tmp; hlist_for_each_entry(tmp, &n->hhead, bydst) { tmp->bydst_reinsert = true; hlist_del_rcu(&tmp->bydst); } node->prefixlen = prefixlen; xfrm_policy_inexact_list_reinsert(net, node, family); if (same_prefixlen) { kfree_rcu(n, rcu); return; } rb_erase(*p, new); kfree_rcu(n, rcu); n = node; goto restart; } } rb_link_node_rcu(&n->node, parent, p); rb_insert_color(&n->node, new); } /* merge nodes v and n */ static void xfrm_policy_inexact_node_merge(struct net *net, struct xfrm_pol_inexact_node *v, struct xfrm_pol_inexact_node *n, u16 family) { struct xfrm_pol_inexact_node *node; struct xfrm_policy *tmp; struct rb_node *rnode; /* To-be-merged node v has a subtree. * * Dismantle it and insert its nodes to n->root. */ while ((rnode = rb_first(&v->root)) != NULL) { node = rb_entry(rnode, struct xfrm_pol_inexact_node, node); rb_erase(&node->node, &v->root); xfrm_policy_inexact_node_reinsert(net, node, &n->root, family); } hlist_for_each_entry(tmp, &v->hhead, bydst) { tmp->bydst_reinsert = true; hlist_del_rcu(&tmp->bydst); } xfrm_policy_inexact_list_reinsert(net, n, family); } static struct xfrm_pol_inexact_node * xfrm_policy_inexact_insert_node(struct net *net, struct rb_root *root, xfrm_address_t *addr, u16 family, u8 prefixlen, u8 dir) { struct xfrm_pol_inexact_node *cached = NULL; struct rb_node **p, *parent = NULL; struct xfrm_pol_inexact_node *node; p = &root->rb_node; while (*p) { int delta; parent = *p; node = rb_entry(*p, struct xfrm_pol_inexact_node, node); delta = xfrm_policy_addr_delta(addr, &node->addr, node->prefixlen, family); if (delta == 0 && prefixlen >= node->prefixlen) { WARN_ON_ONCE(cached); /* ipsec policies got lost */ return node; } if (delta < 0) p = &parent->rb_left; else p = &parent->rb_right; if (prefixlen < node->prefixlen) { delta = xfrm_policy_addr_delta(addr, &node->addr, prefixlen, family); if (delta) continue; /* This node is a subnet of the new prefix. It needs * to be removed and re-inserted with the smaller * prefix and all nodes that are now also covered * by the reduced prefixlen. */ rb_erase(&node->node, root); if (!cached) { xfrm_pol_inexact_node_init(node, addr, prefixlen); cached = node; } else { /* This node also falls within the new * prefixlen. Merge the to-be-reinserted * node and this one. */ xfrm_policy_inexact_node_merge(net, node, cached, family); kfree_rcu(node, rcu); } /* restart */ p = &root->rb_node; parent = NULL; } } node = cached; if (!node) { node = xfrm_pol_inexact_node_alloc(addr, prefixlen); if (!node) return NULL; } rb_link_node_rcu(&node->node, parent, p); rb_insert_color(&node->node, root); return node; } static void xfrm_policy_inexact_gc_tree(struct rb_root *r, bool rm) { struct xfrm_pol_inexact_node *node; struct rb_node *rn = rb_first(r); while (rn) { node = rb_entry(rn, struct xfrm_pol_inexact_node, node); xfrm_policy_inexact_gc_tree(&node->root, rm); rn = rb_next(rn); if (!hlist_empty(&node->hhead) || !RB_EMPTY_ROOT(&node->root)) { WARN_ON_ONCE(rm); continue; } rb_erase(&node->node, r); kfree_rcu(node, rcu); } } static void __xfrm_policy_inexact_prune_bin(struct xfrm_pol_inexact_bin *b, bool net_exit) { write_seqcount_begin(&b->count); xfrm_policy_inexact_gc_tree(&b->root_d, net_exit); xfrm_policy_inexact_gc_tree(&b->root_s, net_exit); write_seqcount_end(&b->count); if (!RB_EMPTY_ROOT(&b->root_d) || !RB_EMPTY_ROOT(&b->root_s) || !hlist_empty(&b->hhead)) { WARN_ON_ONCE(net_exit); return; } if (rhashtable_remove_fast(&xfrm_policy_inexact_table, &b->head, xfrm_pol_inexact_params) == 0) { list_del(&b->inexact_bins); kfree_rcu(b, rcu); } } static void xfrm_policy_inexact_prune_bin(struct xfrm_pol_inexact_bin *b) { struct net *net = read_pnet(&b->k.net); spin_lock_bh(&net->xfrm.xfrm_policy_lock); __xfrm_policy_inexact_prune_bin(b, false); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); } static void __xfrm_policy_inexact_flush(struct net *net) { struct xfrm_pol_inexact_bin *bin, *t; lockdep_assert_held(&net->xfrm.xfrm_policy_lock); list_for_each_entry_safe(bin, t, &net->xfrm.inexact_bins, inexact_bins) __xfrm_policy_inexact_prune_bin(bin, false); } static struct hlist_head * xfrm_policy_inexact_alloc_chain(struct xfrm_pol_inexact_bin *bin, struct xfrm_policy *policy, u8 dir) { struct xfrm_pol_inexact_node *n; struct net *net; net = xp_net(policy); lockdep_assert_held(&net->xfrm.xfrm_policy_lock); if (xfrm_policy_inexact_insert_use_any_list(policy)) return &bin->hhead; if (xfrm_pol_inexact_addr_use_any_list(&policy->selector.daddr, policy->family, policy->selector.prefixlen_d)) { write_seqcount_begin(&bin->count); n = xfrm_policy_inexact_insert_node(net, &bin->root_s, &policy->selector.saddr, policy->family, policy->selector.prefixlen_s, dir); write_seqcount_end(&bin->count); if (!n) return NULL; return &n->hhead; } /* daddr is fixed */ write_seqcount_begin(&bin->count); n = xfrm_policy_inexact_insert_node(net, &bin->root_d, &policy->selector.daddr, policy->family, policy->selector.prefixlen_d, dir); write_seqcount_end(&bin->count); if (!n) return NULL; /* saddr is wildcard */ if (xfrm_pol_inexact_addr_use_any_list(&policy->selector.saddr, policy->family, policy->selector.prefixlen_s)) return &n->hhead; write_seqcount_begin(&bin->count); n = xfrm_policy_inexact_insert_node(net, &n->root, &policy->selector.saddr, policy->family, policy->selector.prefixlen_s, dir); write_seqcount_end(&bin->count); if (!n) return NULL; return &n->hhead; } static struct xfrm_policy * xfrm_policy_inexact_insert(struct xfrm_policy *policy, u8 dir, int excl) { struct xfrm_pol_inexact_bin *bin; struct xfrm_policy *delpol; struct hlist_head *chain; struct net *net; bin = xfrm_policy_inexact_alloc_bin(policy, dir); if (!bin) return ERR_PTR(-ENOMEM); net = xp_net(policy); lockdep_assert_held(&net->xfrm.xfrm_policy_lock); chain = xfrm_policy_inexact_alloc_chain(bin, policy, dir); if (!chain) { __xfrm_policy_inexact_prune_bin(bin, false); return ERR_PTR(-ENOMEM); } delpol = xfrm_policy_insert_list(chain, policy, excl); if (delpol && excl) { __xfrm_policy_inexact_prune_bin(bin, false); return ERR_PTR(-EEXIST); } if (delpol) __xfrm_policy_inexact_prune_bin(bin, false); return delpol; } static bool xfrm_policy_is_dead_or_sk(const struct xfrm_policy *policy) { int dir; if (policy->walk.dead) return true; dir = xfrm_policy_id2dir(policy->index); return dir >= XFRM_POLICY_MAX; } static void xfrm_hash_rebuild(struct work_struct *work) { struct net *net = container_of(work, struct net, xfrm.policy_hthresh.work); struct xfrm_policy *pol; struct xfrm_policy *policy; struct hlist_head *chain; struct hlist_node *newpos; int dir; unsigned seq; u8 lbits4, rbits4, lbits6, rbits6; mutex_lock(&hash_resize_mutex); /* read selector prefixlen thresholds */ do { seq = read_seqbegin(&net->xfrm.policy_hthresh.lock); lbits4 = net->xfrm.policy_hthresh.lbits4; rbits4 = net->xfrm.policy_hthresh.rbits4; lbits6 = net->xfrm.policy_hthresh.lbits6; rbits6 = net->xfrm.policy_hthresh.rbits6; } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq)); spin_lock_bh(&net->xfrm.xfrm_policy_lock); write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation); /* make sure that we can insert the indirect policies again before * we start with destructive action. */ list_for_each_entry(policy, &net->xfrm.policy_all, walk.all) { struct xfrm_pol_inexact_bin *bin; u8 dbits, sbits; if (xfrm_policy_is_dead_or_sk(policy)) continue; dir = xfrm_policy_id2dir(policy->index); if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) { if (policy->family == AF_INET) { dbits = rbits4; sbits = lbits4; } else { dbits = rbits6; sbits = lbits6; } } else { if (policy->family == AF_INET) { dbits = lbits4; sbits = rbits4; } else { dbits = lbits6; sbits = rbits6; } } if (policy->selector.prefixlen_d < dbits || policy->selector.prefixlen_s < sbits) continue; bin = xfrm_policy_inexact_alloc_bin(policy, dir); if (!bin) goto out_unlock; if (!xfrm_policy_inexact_alloc_chain(bin, policy, dir)) goto out_unlock; } for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) { /* dir out => dst = remote, src = local */ net->xfrm.policy_bydst[dir].dbits4 = rbits4; net->xfrm.policy_bydst[dir].sbits4 = lbits4; net->xfrm.policy_bydst[dir].dbits6 = rbits6; net->xfrm.policy_bydst[dir].sbits6 = lbits6; } else { /* dir in/fwd => dst = local, src = remote */ net->xfrm.policy_bydst[dir].dbits4 = lbits4; net->xfrm.policy_bydst[dir].sbits4 = rbits4; net->xfrm.policy_bydst[dir].dbits6 = lbits6; net->xfrm.policy_bydst[dir].sbits6 = rbits6; } } /* re-insert all policies by order of creation */ list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) { if (xfrm_policy_is_dead_or_sk(policy)) continue; hlist_del_rcu(&policy->bydst); newpos = NULL; dir = xfrm_policy_id2dir(policy->index); chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); if (!chain) { void *p = xfrm_policy_inexact_insert(policy, dir, 0); WARN_ONCE(IS_ERR(p), "reinsert: %ld\n", PTR_ERR(p)); continue; } hlist_for_each_entry(pol, chain, bydst) { if (policy->priority >= pol->priority) newpos = &pol->bydst; else break; } if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET) hlist_add_behind_rcu(&policy->bydst, newpos); else hlist_add_head_rcu(&policy->bydst, chain); } out_unlock: __xfrm_policy_inexact_flush(net); write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); mutex_unlock(&hash_resize_mutex); } void xfrm_policy_hash_rebuild(struct net *net) { schedule_work(&net->xfrm.policy_hthresh.work); } EXPORT_SYMBOL(xfrm_policy_hash_rebuild); /* Generate new index... KAME seems to generate them ordered by cost * of an absolute inpredictability of ordering of rules. This will not pass. */ static u32 xfrm_gen_index(struct net *net, int dir, u32 index) { for (;;) { struct hlist_head *list; struct xfrm_policy *p; u32 idx; int found; if (!index) { idx = (net->xfrm.idx_generator | dir); net->xfrm.idx_generator += 8; } else { idx = index; index = 0; } if (idx == 0) idx = 8; list = net->xfrm.policy_byidx + idx_hash(net, idx); found = 0; hlist_for_each_entry(p, list, byidx) { if (p->index == idx) { found = 1; break; } } if (!found) return idx; } } static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2) { u32 *p1 = (u32 *) s1; u32 *p2 = (u32 *) s2; int len = sizeof(struct xfrm_selector) / sizeof(u32); int i; for (i = 0; i < len; i++) { if (p1[i] != p2[i]) return 1; } return 0; } static void xfrm_policy_requeue(struct xfrm_policy *old, struct xfrm_policy *new) { struct xfrm_policy_queue *pq = &old->polq; struct sk_buff_head list; if (skb_queue_empty(&pq->hold_queue)) return; __skb_queue_head_init(&list); spin_lock_bh(&pq->hold_queue.lock); skb_queue_splice_init(&pq->hold_queue, &list); if (timer_delete(&pq->hold_timer)) xfrm_pol_put(old); spin_unlock_bh(&pq->hold_queue.lock); pq = &new->polq; spin_lock_bh(&pq->hold_queue.lock); skb_queue_splice(&list, &pq->hold_queue); pq->timeout = XFRM_QUEUE_TMO_MIN; if (!mod_timer(&pq->hold_timer, jiffies)) xfrm_pol_hold(new); spin_unlock_bh(&pq->hold_queue.lock); } static inline bool xfrm_policy_mark_match(const struct xfrm_mark *mark, struct xfrm_policy *pol) { return mark->v == pol->mark.v && mark->m == pol->mark.m; } static u32 xfrm_pol_bin_key(const void *data, u32 len, u32 seed) { const struct xfrm_pol_inexact_key *k = data; u32 a = k->type << 24 | k->dir << 16 | k->family; return jhash_3words(a, k->if_id, net_hash_mix(read_pnet(&k->net)), seed); } static u32 xfrm_pol_bin_obj(const void *data, u32 len, u32 seed) { const struct xfrm_pol_inexact_bin *b = data; return xfrm_pol_bin_key(&b->k, 0, seed); } static int xfrm_pol_bin_cmp(struct rhashtable_compare_arg *arg, const void *ptr) { const struct xfrm_pol_inexact_key *key = arg->key; const struct xfrm_pol_inexact_bin *b = ptr; int ret; if (!net_eq(read_pnet(&b->k.net), read_pnet(&key->net))) return -1; ret = b->k.dir ^ key->dir; if (ret) return ret; ret = b->k.type ^ key->type; if (ret) return ret; ret = b->k.family ^ key->family; if (ret) return ret; return b->k.if_id ^ key->if_id; } static const struct rhashtable_params xfrm_pol_inexact_params = { .head_offset = offsetof(struct xfrm_pol_inexact_bin, head), .hashfn = xfrm_pol_bin_key, .obj_hashfn = xfrm_pol_bin_obj, .obj_cmpfn = xfrm_pol_bin_cmp, .automatic_shrinking = true, }; static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain, struct xfrm_policy *policy, bool excl) { struct xfrm_policy *pol, *newpos = NULL, *delpol = NULL; hlist_for_each_entry(pol, chain, bydst) { if (pol->type == policy->type && pol->if_id == policy->if_id && !selector_cmp(&pol->selector, &policy->selector) && xfrm_policy_mark_match(&policy->mark, pol) && xfrm_sec_ctx_match(pol->security, policy->security) && !WARN_ON(delpol)) { if (excl) return ERR_PTR(-EEXIST); delpol = pol; if (policy->priority > pol->priority) continue; } else if (policy->priority >= pol->priority) { newpos = pol; continue; } if (delpol) break; } if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET) hlist_add_behind_rcu(&policy->bydst, &newpos->bydst); else /* Packet offload policies enter to the head * to speed-up lookups. */ hlist_add_head_rcu(&policy->bydst, chain); return delpol; } int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) { struct net *net = xp_net(policy); struct xfrm_policy *delpol; struct hlist_head *chain; /* Sanitize mark before store */ policy->mark.v &= policy->mark.m; spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); if (chain) delpol = xfrm_policy_insert_list(chain, policy, excl); else delpol = xfrm_policy_inexact_insert(policy, dir, excl); if (IS_ERR(delpol)) { spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return PTR_ERR(delpol); } __xfrm_policy_link(policy, dir); /* After previous checking, family can either be AF_INET or AF_INET6 */ if (policy->family == AF_INET) rt_genid_bump_ipv4(net); else rt_genid_bump_ipv6(net); if (delpol) { xfrm_policy_requeue(delpol, policy); __xfrm_policy_unlink(delpol, dir); } policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index); hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index)); policy->curlft.add_time = ktime_get_real_seconds(); policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (delpol) xfrm_policy_kill(delpol); else if (xfrm_bydst_should_resize(net, dir, NULL)) schedule_work(&net->xfrm.policy_hash_work); return 0; } EXPORT_SYMBOL(xfrm_policy_insert); static struct xfrm_policy * __xfrm_policy_bysel_ctx(struct hlist_head *chain, const struct xfrm_mark *mark, u32 if_id, u8 type, int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx) { struct xfrm_policy *pol; if (!chain) return NULL; hlist_for_each_entry(pol, chain, bydst) { if (pol->type == type && pol->if_id == if_id && xfrm_policy_mark_match(mark, pol) && !selector_cmp(sel, &pol->selector) && xfrm_sec_ctx_match(ctx, pol->security)) return pol; } return NULL; } struct xfrm_policy * xfrm_policy_bysel_ctx(struct net *net, const struct xfrm_mark *mark, u32 if_id, u8 type, int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete, int *err) { struct xfrm_pol_inexact_bin *bin = NULL; struct xfrm_policy *pol, *ret = NULL; struct hlist_head *chain; *err = 0; spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_bysel(net, sel, sel->family, dir); if (!chain) { struct xfrm_pol_inexact_candidates cand; int i; bin = xfrm_policy_inexact_lookup(net, type, sel->family, dir, if_id); if (!bin) { spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return NULL; } if (!xfrm_policy_find_inexact_candidates(&cand, bin, &sel->saddr, &sel->daddr)) { spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return NULL; } pol = NULL; for (i = 0; i < ARRAY_SIZE(cand.res); i++) { struct xfrm_policy *tmp; tmp = __xfrm_policy_bysel_ctx(cand.res[i], mark, if_id, type, dir, sel, ctx); if (!tmp) continue; if (!pol || tmp->pos < pol->pos) pol = tmp; } } else { pol = __xfrm_policy_bysel_ctx(chain, mark, if_id, type, dir, sel, ctx); } if (pol) { xfrm_pol_hold(pol); if (delete) { *err = security_xfrm_policy_delete(pol->security); if (*err) { spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return pol; } __xfrm_policy_unlink(pol, dir); } ret = pol; } spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (ret && delete) xfrm_policy_kill(ret); if (bin && delete) xfrm_policy_inexact_prune_bin(bin); return ret; } EXPORT_SYMBOL(xfrm_policy_bysel_ctx); struct xfrm_policy * xfrm_policy_byid(struct net *net, const struct xfrm_mark *mark, u32 if_id, u8 type, int dir, u32 id, int delete, int *err) { struct xfrm_policy *pol, *ret; struct hlist_head *chain; *err = -ENOENT; if (xfrm_policy_id2dir(id) != dir) return NULL; *err = 0; spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = net->xfrm.policy_byidx + idx_hash(net, id); ret = NULL; hlist_for_each_entry(pol, chain, byidx) { if (pol->type == type && pol->index == id && pol->if_id == if_id && xfrm_policy_mark_match(mark, pol)) { xfrm_pol_hold(pol); if (delete) { *err = security_xfrm_policy_delete( pol->security); if (*err) { spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return pol; } __xfrm_policy_unlink(pol, dir); } ret = pol; break; } } spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (ret && delete) xfrm_policy_kill(ret); return ret; } EXPORT_SYMBOL(xfrm_policy_byid); #ifdef CONFIG_SECURITY_NETWORK_XFRM static inline int xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid) { struct xfrm_policy *pol; int err = 0; list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) { if (pol->walk.dead || xfrm_policy_id2dir(pol->index) >= XFRM_POLICY_MAX || pol->type != type) continue; err = security_xfrm_policy_delete(pol->security); if (err) { xfrm_audit_policy_delete(pol, 0, task_valid); return err; } } return err; } static inline int xfrm_dev_policy_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid) { struct xfrm_policy *pol; int err = 0; list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) { if (pol->walk.dead || xfrm_policy_id2dir(pol->index) >= XFRM_POLICY_MAX || pol->xdo.dev != dev) continue; err = security_xfrm_policy_delete(pol->security); if (err) { xfrm_audit_policy_delete(pol, 0, task_valid); return err; } } return err; } #else static inline int xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid) { return 0; } static inline int xfrm_dev_policy_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid) { return 0; } #endif int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) { int dir, err = 0, cnt = 0; struct xfrm_policy *pol; spin_lock_bh(&net->xfrm.xfrm_policy_lock); err = xfrm_policy_flush_secctx_check(net, type, task_valid); if (err) goto out; again: list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) { if (pol->walk.dead) continue; dir = xfrm_policy_id2dir(pol->index); if (dir >= XFRM_POLICY_MAX || pol->type != type) continue; __xfrm_policy_unlink(pol, dir); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); cnt++; xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); spin_lock_bh(&net->xfrm.xfrm_policy_lock); goto again; } if (cnt) __xfrm_policy_inexact_flush(net); else err = -ESRCH; out: spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return err; } EXPORT_SYMBOL(xfrm_policy_flush); int xfrm_dev_policy_flush(struct net *net, struct net_device *dev, bool task_valid) { int dir, err = 0, cnt = 0; struct xfrm_policy *pol; spin_lock_bh(&net->xfrm.xfrm_policy_lock); err = xfrm_dev_policy_flush_secctx_check(net, dev, task_valid); if (err) goto out; again: list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) { if (pol->walk.dead) continue; dir = xfrm_policy_id2dir(pol->index); if (dir >= XFRM_POLICY_MAX || pol->xdo.dev != dev) continue; __xfrm_policy_unlink(pol, dir); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); cnt++; xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); spin_lock_bh(&net->xfrm.xfrm_policy_lock); goto again; } if (cnt) __xfrm_policy_inexact_flush(net); else err = -ESRCH; out: spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return err; } EXPORT_SYMBOL(xfrm_dev_policy_flush); int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, int (*func)(struct xfrm_policy *, int, int, void*), void *data) { struct xfrm_policy *pol; struct xfrm_policy_walk_entry *x; int error = 0; if (walk->type >= XFRM_POLICY_TYPE_MAX && walk->type != XFRM_POLICY_TYPE_ANY) return -EINVAL; if (list_empty(&walk->walk.all) && walk->seq != 0) return 0; spin_lock_bh(&net->xfrm.xfrm_policy_lock); if (list_empty(&walk->walk.all)) x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all); else x = list_first_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all); list_for_each_entry_from(x, &net->xfrm.policy_all, all) { if (x->dead) continue; pol = container_of(x, struct xfrm_policy, walk); if (walk->type != XFRM_POLICY_TYPE_ANY && walk->type != pol->type) continue; error = func(pol, xfrm_policy_id2dir(pol->index), walk->seq, data); if (error) { list_move_tail(&walk->walk.all, &x->all); goto out; } walk->seq++; } if (walk->seq == 0) { error = -ENOENT; goto out; } list_del_init(&walk->walk.all); out: spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return error; } EXPORT_SYMBOL(xfrm_policy_walk); void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type) { INIT_LIST_HEAD(&walk->walk.all); walk->walk.dead = 1; walk->type = type; walk->seq = 0; } EXPORT_SYMBOL(xfrm_policy_walk_init); void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net) { if (list_empty(&walk->walk.all)) return; spin_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */ list_del(&walk->walk.all); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); } EXPORT_SYMBOL(xfrm_policy_walk_done); /* * Find policy to apply to this flow. * * Returns 0 if policy found, else an -errno. */ static int xfrm_policy_match(const struct xfrm_policy *pol, const struct flowi *fl, u8 type, u16 family, u32 if_id) { const struct xfrm_selector *sel = &pol->selector; int ret = -ESRCH; bool match; if (pol->family != family || pol->if_id != if_id || (fl->flowi_mark & pol->mark.m) != pol->mark.v || pol->type != type) return ret; match = xfrm_selector_match(sel, fl, family); if (match) ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid); return ret; } static struct xfrm_pol_inexact_node * xfrm_policy_lookup_inexact_addr(const struct rb_root *r, seqcount_spinlock_t *count, const xfrm_address_t *addr, u16 family) { const struct rb_node *parent; int seq; again: seq = read_seqcount_begin(count); parent = rcu_dereference_raw(r->rb_node); while (parent) { struct xfrm_pol_inexact_node *node; int delta; node = rb_entry(parent, struct xfrm_pol_inexact_node, node); delta = xfrm_policy_addr_delta(addr, &node->addr, node->prefixlen, family); if (delta < 0) { parent = rcu_dereference_raw(parent->rb_left); continue; } else if (delta > 0) { parent = rcu_dereference_raw(parent->rb_right); continue; } return node; } if (read_seqcount_retry(count, seq)) goto again; return NULL; } static bool xfrm_policy_find_inexact_candidates(struct xfrm_pol_inexact_candidates *cand, struct xfrm_pol_inexact_bin *b, const xfrm_address_t *saddr, const xfrm_address_t *daddr) { struct xfrm_pol_inexact_node *n; u16 family; if (!b) return false; family = b->k.family; memset(cand, 0, sizeof(*cand)); cand->res[XFRM_POL_CAND_ANY] = &b->hhead; n = xfrm_policy_lookup_inexact_addr(&b->root_d, &b->count, daddr, family); if (n) { cand->res[XFRM_POL_CAND_DADDR] = &n->hhead; n = xfrm_policy_lookup_inexact_addr(&n->root, &b->count, saddr, family); if (n) cand->res[XFRM_POL_CAND_BOTH] = &n->hhead; } n = xfrm_policy_lookup_inexact_addr(&b->root_s, &b->count, saddr, family); if (n) cand->res[XFRM_POL_CAND_SADDR] = &n->hhead; return true; } static struct xfrm_pol_inexact_bin * xfrm_policy_inexact_lookup_rcu(struct net *net, u8 type, u16 family, u8 dir, u32 if_id) { struct xfrm_pol_inexact_key k = { .family = family, .type = type, .dir = dir, .if_id = if_id, }; write_pnet(&k.net, net); return rhashtable_lookup(&xfrm_policy_inexact_table, &k, xfrm_pol_inexact_params); } static struct xfrm_pol_inexact_bin * xfrm_policy_inexact_lookup(struct net *net, u8 type, u16 family, u8 dir, u32 if_id) { struct xfrm_pol_inexact_bin *bin; lockdep_assert_held(&net->xfrm.xfrm_policy_lock); rcu_read_lock(); bin = xfrm_policy_inexact_lookup_rcu(net, type, family, dir, if_id); rcu_read_unlock(); return bin; } static struct xfrm_policy * __xfrm_policy_eval_candidates(struct hlist_head *chain, struct xfrm_policy *prefer, const struct flowi *fl, u8 type, u16 family, u32 if_id) { u32 priority = prefer ? prefer->priority : ~0u; struct xfrm_policy *pol; if (!chain) return NULL; hlist_for_each_entry_rcu(pol, chain, bydst) { int err; if (pol->priority > priority) break; err = xfrm_policy_match(pol, fl, type, family, if_id); if (err) { if (err != -ESRCH) return ERR_PTR(err); continue; } if (prefer) { /* matches. Is it older than *prefer? */ if (pol->priority == priority && prefer->pos < pol->pos) return prefer; } return pol; } return NULL; } static struct xfrm_policy * xfrm_policy_eval_candidates(struct xfrm_pol_inexact_candidates *cand, struct xfrm_policy *prefer, const struct flowi *fl, u8 type, u16 family, u32 if_id) { struct xfrm_policy *tmp; int i; for (i = 0; i < ARRAY_SIZE(cand->res); i++) { tmp = __xfrm_policy_eval_candidates(cand->res[i], prefer, fl, type, family, if_id); if (!tmp) continue; if (IS_ERR(tmp)) return tmp; prefer = tmp; } return prefer; } static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, const struct flowi *fl, u16 family, u8 dir, u32 if_id) { struct xfrm_pol_inexact_candidates cand; const xfrm_address_t *daddr, *saddr; struct xfrm_pol_inexact_bin *bin; struct xfrm_policy *pol, *ret; struct hlist_head *chain; unsigned int sequence; int err; daddr = xfrm_flowi_daddr(fl, family); saddr = xfrm_flowi_saddr(fl, family); if (unlikely(!daddr || !saddr)) return NULL; rcu_read_lock(); retry: do { sequence = read_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation); chain = policy_hash_direct(net, daddr, saddr, family, dir); } while (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence)); ret = NULL; hlist_for_each_entry_rcu(pol, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, if_id); if (err) { if (err == -ESRCH) continue; else { ret = ERR_PTR(err); goto fail; } } else { ret = pol; break; } } if (ret && ret->xdo.type == XFRM_DEV_OFFLOAD_PACKET) goto skip_inexact; bin = xfrm_policy_inexact_lookup_rcu(net, type, family, dir, if_id); if (!bin || !xfrm_policy_find_inexact_candidates(&cand, bin, saddr, daddr)) goto skip_inexact; pol = xfrm_policy_eval_candidates(&cand, ret, fl, type, family, if_id); if (pol) { ret = pol; if (IS_ERR(pol)) goto fail; } skip_inexact: if (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence)) goto retry; if (ret && !xfrm_pol_hold_rcu(ret)) goto retry; fail: rcu_read_unlock(); return ret; } static struct xfrm_policy *xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, u32 if_id) { #ifdef CONFIG_XFRM_SUB_POLICY struct xfrm_policy *pol; pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir, if_id); if (pol != NULL) return pol; #endif return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir, if_id); } static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, const struct flowi *fl, u16 family, u32 if_id) { struct xfrm_policy *pol; rcu_read_lock(); again: pol = rcu_dereference(sk->sk_policy[dir]); if (pol != NULL) { bool match; int err = 0; if (pol->family != family) { pol = NULL; goto out; } match = xfrm_selector_match(&pol->selector, fl, family); if (match) { if ((READ_ONCE(sk->sk_mark) & pol->mark.m) != pol->mark.v || pol->if_id != if_id) { pol = NULL; goto out; } err = security_xfrm_policy_lookup(pol->security, fl->flowi_secid); if (!err) { if (!xfrm_pol_hold_rcu(pol)) goto again; } else if (err == -ESRCH) { pol = NULL; } else { pol = ERR_PTR(err); } } else pol = NULL; } out: rcu_read_unlock(); return pol; } static u32 xfrm_gen_pos_slow(struct net *net) { struct xfrm_policy *policy; u32 i = 0; /* oldest entry is last in list */ list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) { if (!xfrm_policy_is_dead_or_sk(policy)) policy->pos = ++i; } return i; } static u32 xfrm_gen_pos(struct net *net) { const struct xfrm_policy *policy; u32 i = 0; /* most recently added policy is at the head of the list */ list_for_each_entry(policy, &net->xfrm.policy_all, walk.all) { if (xfrm_policy_is_dead_or_sk(policy)) continue; if (policy->pos == UINT_MAX) return xfrm_gen_pos_slow(net); i = policy->pos + 1; break; } return i; } static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) { struct net *net = xp_net(pol); switch (dir) { case XFRM_POLICY_IN: case XFRM_POLICY_FWD: case XFRM_POLICY_OUT: pol->pos = xfrm_gen_pos(net); break; } list_add(&pol->walk.all, &net->xfrm.policy_all); net->xfrm.policy_count[dir]++; xfrm_pol_hold(pol); } static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int dir) { struct net *net = xp_net(pol); if (list_empty(&pol->walk.all)) return NULL; /* Socket policies are not hashed. */ if (!hlist_unhashed(&pol->bydst)) { hlist_del_rcu(&pol->bydst); hlist_del(&pol->byidx); } list_del_init(&pol->walk.all); net->xfrm.policy_count[dir]--; return pol; } static void xfrm_sk_policy_link(struct xfrm_policy *pol, int dir) { __xfrm_policy_link(pol, XFRM_POLICY_MAX + dir); } static void xfrm_sk_policy_unlink(struct xfrm_policy *pol, int dir) { __xfrm_policy_unlink(pol, XFRM_POLICY_MAX + dir); } int xfrm_policy_delete(struct xfrm_policy *pol, int dir) { struct net *net = xp_net(pol); spin_lock_bh(&net->xfrm.xfrm_policy_lock); pol = __xfrm_policy_unlink(pol, dir); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (pol) { xfrm_policy_kill(pol); return 0; } return -ENOENT; } EXPORT_SYMBOL(xfrm_policy_delete); int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) { struct net *net = sock_net(sk); struct xfrm_policy *old_pol; #ifdef CONFIG_XFRM_SUB_POLICY if (pol && pol->type != XFRM_POLICY_TYPE_MAIN) return -EINVAL; #endif spin_lock_bh(&net->xfrm.xfrm_policy_lock); old_pol = rcu_dereference_protected(sk->sk_policy[dir], lockdep_is_held(&net->xfrm.xfrm_policy_lock)); if (pol) { pol->curlft.add_time = ktime_get_real_seconds(); pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0); xfrm_sk_policy_link(pol, dir); } rcu_assign_pointer(sk->sk_policy[dir], pol); if (old_pol) { if (pol) xfrm_policy_requeue(old_pol, pol); /* Unlinking succeeds always. This is the only function * allowed to delete or replace socket policy. */ xfrm_sk_policy_unlink(old_pol, dir); } spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (old_pol) { xfrm_policy_kill(old_pol); } return 0; } static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) { struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC); struct net *net = xp_net(old); if (newp) { newp->selector = old->selector; if (security_xfrm_policy_clone(old->security, &newp->security)) { kfree(newp); return NULL; /* ENOMEM */ } newp->lft = old->lft; newp->curlft = old->curlft; newp->mark = old->mark; newp->if_id = old->if_id; newp->action = old->action; newp->flags = old->flags; newp->xfrm_nr = old->xfrm_nr; newp->index = old->index; newp->type = old->type; newp->family = old->family; memcpy(newp->xfrm_vec, old->xfrm_vec, newp->xfrm_nr*sizeof(struct xfrm_tmpl)); spin_lock_bh(&net->xfrm.xfrm_policy_lock); xfrm_sk_policy_link(newp, dir); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); xfrm_pol_put(newp); } return newp; } int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { const struct xfrm_policy *p; struct xfrm_policy *np; int i, ret = 0; rcu_read_lock(); for (i = 0; i < 2; i++) { p = rcu_dereference(osk->sk_policy[i]); if (p) { np = clone_policy(p, i); if (unlikely(!np)) { ret = -ENOMEM; break; } rcu_assign_pointer(sk->sk_policy[i], np); } } rcu_read_unlock(); return ret; } static int xfrm_get_saddr(unsigned short family, xfrm_address_t *saddr, const struct xfrm_dst_lookup_params *params) { int err; const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); if (unlikely(afinfo == NULL)) return -EINVAL; err = afinfo->get_saddr(saddr, params); rcu_read_unlock(); return err; } /* Resolve list of templates for the flow, given policy. */ static int xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, struct xfrm_state **xfrm, unsigned short family) { struct net *net = xp_net(policy); int nx; int i, error; xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); xfrm_address_t tmp; for (nx = 0, i = 0; i < policy->xfrm_nr; i++) { struct xfrm_state *x; xfrm_address_t *remote = daddr; xfrm_address_t *local = saddr; struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; if (tmpl->mode == XFRM_MODE_TUNNEL || tmpl->mode == XFRM_MODE_IPTFS || tmpl->mode == XFRM_MODE_BEET) { remote = &tmpl->id.daddr; local = &tmpl->saddr; if (xfrm_addr_any(local, tmpl->encap_family)) { struct xfrm_dst_lookup_params params; memset(&params, 0, sizeof(params)); params.net = net; params.oif = fl->flowi_oif; params.daddr = remote; error = xfrm_get_saddr(tmpl->encap_family, &tmp, &params); if (error) goto fail; local = &tmp; } } x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family, policy->if_id); if (x && x->dir && x->dir != XFRM_SA_DIR_OUT) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEDIRERROR); xfrm_state_put(x); error = -EINVAL; goto fail; } if (x && x->km.state == XFRM_STATE_VALID) { xfrm[nx++] = x; daddr = remote; saddr = local; continue; } if (x) { error = (x->km.state == XFRM_STATE_ERROR ? -EINVAL : -EAGAIN); xfrm_state_put(x); } else if (error == -ESRCH) { error = -EAGAIN; } if (!tmpl->optional) goto fail; } return nx; fail: for (nx--; nx >= 0; nx--) xfrm_state_put(xfrm[nx]); return error; } static int xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl, struct xfrm_state **xfrm, unsigned short family) { struct xfrm_state *tp[XFRM_MAX_DEPTH]; struct xfrm_state **tpp = (npols > 1) ? tp : xfrm; int cnx = 0; int error; int ret; int i; for (i = 0; i < npols; i++) { if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) { error = -ENOBUFS; goto fail; } ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family); if (ret < 0) { error = ret; goto fail; } else cnx += ret; } /* found states are sorted for outbound processing */ if (npols > 1) xfrm_state_sort(xfrm, tpp, cnx, family); return cnx; fail: for (cnx--; cnx >= 0; cnx--) xfrm_state_put(tpp[cnx]); return error; } static dscp_t xfrm_get_dscp(const struct flowi *fl, int family) { if (family == AF_INET) return fl->u.ip4.flowi4_dscp; return 0; } static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) { const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); struct dst_ops *dst_ops; struct xfrm_dst *xdst; if (!afinfo) return ERR_PTR(-EINVAL); switch (family) { case AF_INET: dst_ops = &net->xfrm.xfrm4_dst_ops; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: dst_ops = &net->xfrm.xfrm6_dst_ops; break; #endif default: BUG(); } xdst = dst_alloc(dst_ops, NULL, DST_OBSOLETE_NONE, 0); if (likely(xdst)) { memset_after(xdst, 0, u.dst); } else xdst = ERR_PTR(-ENOBUFS); rcu_read_unlock(); return xdst; } static void xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, int nfheader_len) { if (dst->ops->family == AF_INET6) { path->path_cookie = rt6_get_cookie(dst_rt6_info(dst)); path->u.rt6.rt6i_nfheader_len = nfheader_len; } } static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl) { const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(xdst->u.dst.ops->family); int err; if (!afinfo) return -EINVAL; err = afinfo->fill_dst(xdst, dev, fl); rcu_read_unlock(); return err; } /* Allocate chain of dst_entry's, attach known xfrm's, calculate * all the metrics... Shortly, bundle a bundle. */ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, struct xfrm_dst **bundle, int nx, const struct flowi *fl, struct dst_entry *dst) { const struct xfrm_state_afinfo *afinfo; const struct xfrm_mode *inner_mode; struct net *net = xp_net(policy); unsigned long now = jiffies; struct net_device *dev; struct xfrm_dst *xdst_prev = NULL; struct xfrm_dst *xdst0 = NULL; int i = 0; int err; int header_len = 0; int nfheader_len = 0; int trailer_len = 0; int family = policy->selector.family; xfrm_address_t saddr, daddr; dscp_t dscp; xfrm_flowi_addr_get(fl, &saddr, &daddr, family); dscp = xfrm_get_dscp(fl, family); dst_hold(dst); for (; i < nx; i++) { struct xfrm_dst *xdst = xfrm_alloc_dst(net, family); struct dst_entry *dst1 = &xdst->u.dst; err = PTR_ERR(xdst); if (IS_ERR(xdst)) { dst_release(dst); goto put_states; } bundle[i] = xdst; if (!xdst_prev) xdst0 = xdst; else /* Ref count is taken during xfrm_alloc_dst() * No need to do dst_clone() on dst1 */ xfrm_dst_set_child(xdst_prev, &xdst->u.dst); if (xfrm[i]->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(xfrm[i], xfrm_af2proto(family)); if (!inner_mode) { err = -EAFNOSUPPORT; dst_release(dst); goto put_states; } } else inner_mode = &xfrm[i]->inner_mode; xdst->route = dst; dst_copy_metrics(dst1, dst); if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { __u32 mark = 0; int oif; if (xfrm[i]->props.smark.v || xfrm[i]->props.smark.m) mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]); if (xfrm[i]->xso.type != XFRM_DEV_OFFLOAD_PACKET) family = xfrm[i]->props.family; oif = fl->flowi_oif ? : fl->flowi_l3mdev; dst = xfrm_dst_lookup(xfrm[i], dscp, oif, &saddr, &daddr, family, mark); err = PTR_ERR(dst); if (IS_ERR(dst)) goto put_states; } else dst_hold(dst); dst1->xfrm = xfrm[i]; xdst->xfrm_genid = xfrm[i]->genid; dst1->obsolete = DST_OBSOLETE_FORCE_CHK; dst1->lastuse = now; dst1->input = dst_discard; if (xfrm[i]->mode_cbs && xfrm[i]->mode_cbs->output) { dst1->output = xfrm[i]->mode_cbs->output; } else { rcu_read_lock(); afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family); if (likely(afinfo)) dst1->output = afinfo->output; else dst1->output = dst_discard_out; rcu_read_unlock(); } xdst_prev = xdst; header_len += xfrm[i]->props.header_len; if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT) nfheader_len += xfrm[i]->props.header_len; trailer_len += xfrm[i]->props.trailer_len; } xfrm_dst_set_child(xdst_prev, dst); xdst0->path = dst; err = -ENODEV; dev = dst->dev; if (!dev) goto free_dst; xfrm_init_path(xdst0, dst, nfheader_len); xfrm_init_pmtu(bundle, nx); for (xdst_prev = xdst0; xdst_prev != (struct xfrm_dst *)dst; xdst_prev = (struct xfrm_dst *) xfrm_dst_child(&xdst_prev->u.dst)) { err = xfrm_fill_dst(xdst_prev, dev, fl); if (err) goto free_dst; xdst_prev->u.dst.header_len = header_len; xdst_prev->u.dst.trailer_len = trailer_len; header_len -= xdst_prev->u.dst.xfrm->props.header_len; trailer_len -= xdst_prev->u.dst.xfrm->props.trailer_len; } return &xdst0->u.dst; put_states: for (; i < nx; i++) xfrm_state_put(xfrm[i]); free_dst: if (xdst0) dst_release_immediate(&xdst0->u.dst); return ERR_PTR(err); } static int xfrm_expand_policies(const struct flowi *fl, u16 family, struct xfrm_policy **pols, int *num_pols, int *num_xfrms) { int i; if (*num_pols == 0 || !pols[0]) { *num_pols = 0; *num_xfrms = 0; return 0; } if (IS_ERR(pols[0])) { *num_pols = 0; return PTR_ERR(pols[0]); } *num_xfrms = pols[0]->xfrm_nr; #ifdef CONFIG_XFRM_SUB_POLICY if (pols[0]->action == XFRM_POLICY_ALLOW && pols[0]->type != XFRM_POLICY_TYPE_MAIN) { pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]), XFRM_POLICY_TYPE_MAIN, fl, family, XFRM_POLICY_OUT, pols[0]->if_id); if (pols[1]) { if (IS_ERR(pols[1])) { xfrm_pols_put(pols, *num_pols); *num_pols = 0; return PTR_ERR(pols[1]); } (*num_pols)++; (*num_xfrms) += pols[1]->xfrm_nr; } } #endif for (i = 0; i < *num_pols; i++) { if (pols[i]->action != XFRM_POLICY_ALLOW) { *num_xfrms = -1; break; } } return 0; } static struct xfrm_dst * xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, const struct flowi *fl, u16 family, struct dst_entry *dst_orig) { struct net *net = xp_net(pols[0]); struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; struct xfrm_dst *bundle[XFRM_MAX_DEPTH]; struct xfrm_dst *xdst; struct dst_entry *dst; int err; /* Try to instantiate a bundle */ err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); if (err <= 0) { if (err == 0) return NULL; if (err != -EAGAIN) XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); return ERR_PTR(err); } dst = xfrm_bundle_create(pols[0], xfrm, bundle, err, fl, dst_orig); if (IS_ERR(dst)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); return ERR_CAST(dst); } xdst = (struct xfrm_dst *)dst; xdst->num_xfrms = err; xdst->num_pols = num_pols; memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); xdst->policy_genid = atomic_read(&pols[0]->genid); return xdst; } static void xfrm_policy_queue_process(struct timer_list *t) { struct sk_buff *skb; struct sock *sk; struct dst_entry *dst; struct xfrm_policy *pol = timer_container_of(pol, t, polq.hold_timer); struct net *net = xp_net(pol); struct xfrm_policy_queue *pq = &pol->polq; struct flowi fl; struct sk_buff_head list; __u32 skb_mark; spin_lock(&pq->hold_queue.lock); skb = skb_peek(&pq->hold_queue); if (!skb) { spin_unlock(&pq->hold_queue.lock); goto out; } dst = skb_dst(skb); sk = skb->sk; /* Fixup the mark to support VTI. */ skb_mark = skb->mark; skb->mark = pol->mark.v; xfrm_decode_session(net, skb, &fl, dst->ops->family); skb->mark = skb_mark; spin_unlock(&pq->hold_queue.lock); dst_hold(xfrm_dst_path(dst)); dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, XFRM_LOOKUP_QUEUE); if (IS_ERR(dst)) goto purge_queue; if (dst->flags & DST_XFRM_QUEUE) { dst_release(dst); if (pq->timeout >= XFRM_QUEUE_TMO_MAX) goto purge_queue; pq->timeout = pq->timeout << 1; if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout)) xfrm_pol_hold(pol); goto out; } dst_release(dst); __skb_queue_head_init(&list); spin_lock(&pq->hold_queue.lock); pq->timeout = 0; skb_queue_splice_init(&pq->hold_queue, &list); spin_unlock(&pq->hold_queue.lock); while (!skb_queue_empty(&list)) { skb = __skb_dequeue(&list); /* Fixup the mark to support VTI. */ skb_mark = skb->mark; skb->mark = pol->mark.v; xfrm_decode_session(net, skb, &fl, skb_dst(skb)->ops->family); skb->mark = skb_mark; dst_hold(xfrm_dst_path(skb_dst(skb))); dst = xfrm_lookup(net, xfrm_dst_path(skb_dst(skb)), &fl, skb->sk, 0); if (IS_ERR(dst)) { kfree_skb(skb); continue; } nf_reset_ct(skb); skb_dst_drop(skb); skb_dst_set(skb, dst); dst_output(net, skb_to_full_sk(skb), skb); } out: xfrm_pol_put(pol); return; purge_queue: pq->timeout = 0; skb_queue_purge(&pq->hold_queue); xfrm_pol_put(pol); } static int xdst_queue_output(struct net *net, struct sock *sk, struct sk_buff *skb) { unsigned long sched_next; struct dst_entry *dst = skb_dst(skb); struct xfrm_dst *xdst = (struct xfrm_dst *) dst; struct xfrm_policy *pol = xdst->pols[0]; struct xfrm_policy_queue *pq = &pol->polq; if (unlikely(skb_fclone_busy(sk, skb))) { kfree_skb(skb); return 0; } if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) { kfree_skb(skb); return -EAGAIN; } skb_dst_force(skb); spin_lock_bh(&pq->hold_queue.lock); if (!pq->timeout) pq->timeout = XFRM_QUEUE_TMO_MIN; sched_next = jiffies + pq->timeout; if (timer_delete(&pq->hold_timer)) { if (time_before(pq->hold_timer.expires, sched_next)) sched_next = pq->hold_timer.expires; xfrm_pol_put(pol); } __skb_queue_tail(&pq->hold_queue, skb); if (!mod_timer(&pq->hold_timer, sched_next)) xfrm_pol_hold(pol); spin_unlock_bh(&pq->hold_queue.lock); return 0; } static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, struct xfrm_flo *xflo, const struct flowi *fl, int num_xfrms, u16 family) { int err; struct net_device *dev; struct dst_entry *dst; struct dst_entry *dst1; struct xfrm_dst *xdst; xdst = xfrm_alloc_dst(net, family); if (IS_ERR(xdst)) return xdst; if (!(xflo->flags & XFRM_LOOKUP_QUEUE) || net->xfrm.sysctl_larval_drop || num_xfrms <= 0) return xdst; dst = xflo->dst_orig; dst1 = &xdst->u.dst; dst_hold(dst); xdst->route = dst; dst_copy_metrics(dst1, dst); dst1->obsolete = DST_OBSOLETE_FORCE_CHK; dst1->flags |= DST_XFRM_QUEUE; dst1->lastuse = jiffies; dst1->input = dst_discard; dst1->output = xdst_queue_output; dst_hold(dst); xfrm_dst_set_child(xdst, dst); xdst->path = dst; xfrm_init_path((struct xfrm_dst *)dst1, dst, 0); err = -ENODEV; dev = dst->dev; if (!dev) goto free_dst; err = xfrm_fill_dst(xdst, dev, fl); if (err) goto free_dst; out: return xdst; free_dst: dst_release(dst1); xdst = ERR_PTR(err); goto out; } static struct xfrm_dst *xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct xfrm_flo *xflo, u32 if_id) { struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int num_pols = 0, num_xfrms = 0, err; struct xfrm_dst *xdst; /* Resolve policies to use if we couldn't get them from * previous cache entry */ num_pols = 1; pols[0] = xfrm_policy_lookup(net, fl, family, dir, if_id); err = xfrm_expand_policies(fl, family, pols, &num_pols, &num_xfrms); if (err < 0) goto inc_error; if (num_pols == 0) return NULL; if (num_xfrms <= 0) goto make_dummy_bundle; xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, xflo->dst_orig); if (IS_ERR(xdst)) { err = PTR_ERR(xdst); if (err == -EREMOTE) { xfrm_pols_put(pols, num_pols); return NULL; } if (err != -EAGAIN) goto error; goto make_dummy_bundle; } else if (xdst == NULL) { num_xfrms = 0; goto make_dummy_bundle; } return xdst; make_dummy_bundle: /* We found policies, but there's no bundles to instantiate: * either because the policy blocks, has no transformations or * we could not build template (no xfrm_states).*/ xdst = xfrm_create_dummy_bundle(net, xflo, fl, num_xfrms, family); if (IS_ERR(xdst)) { xfrm_pols_put(pols, num_pols); return ERR_CAST(xdst); } xdst->num_pols = num_pols; xdst->num_xfrms = num_xfrms; memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); return xdst; inc_error: XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); error: xfrm_pols_put(pols, num_pols); return ERR_PTR(err); } static struct dst_entry *make_blackhole(struct net *net, u16 family, struct dst_entry *dst_orig) { const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); struct dst_entry *ret; if (!afinfo) { dst_release(dst_orig); return ERR_PTR(-EINVAL); } else { ret = afinfo->blackhole_route(net, dst_orig); } rcu_read_unlock(); return ret; } /* Finds/creates a bundle for given flow and if_id * * At the moment we eat a raw IP route. Mostly to speed up lookups * on interfaces with disabled IPsec. * * xfrm_lookup uses an if_id of 0 by default, and is provided for * compatibility */ struct dst_entry *xfrm_lookup_with_ifid(struct net *net, struct dst_entry *dst_orig, const struct flowi *fl, const struct sock *sk, int flags, u32 if_id) { struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; struct xfrm_dst *xdst; struct dst_entry *dst, *route; u16 family = dst_orig->ops->family; u8 dir = XFRM_POLICY_OUT; int i, err, num_pols, num_xfrms = 0, drop_pols = 0; dst = NULL; xdst = NULL; route = NULL; sk = sk_const_to_full_sk(sk); if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { num_pols = 1; pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family, if_id); err = xfrm_expand_policies(fl, family, pols, &num_pols, &num_xfrms); if (err < 0) goto dropdst; if (num_pols) { if (num_xfrms <= 0) { drop_pols = num_pols; goto no_transform; } xdst = xfrm_resolve_and_create_bundle( pols, num_pols, fl, family, dst_orig); if (IS_ERR(xdst)) { xfrm_pols_put(pols, num_pols); err = PTR_ERR(xdst); if (err == -EREMOTE) goto nopol; goto dropdst; } else if (xdst == NULL) { num_xfrms = 0; drop_pols = num_pols; goto no_transform; } route = xdst->route; } } if (xdst == NULL) { struct xfrm_flo xflo; xflo.dst_orig = dst_orig; xflo.flags = flags; /* To accelerate a bit... */ if (!if_id && ((dst_orig->flags & DST_NOXFRM) || !net->xfrm.policy_count[XFRM_POLICY_OUT])) goto nopol; xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo, if_id); if (xdst == NULL) goto nopol; if (IS_ERR(xdst)) { err = PTR_ERR(xdst); goto dropdst; } num_pols = xdst->num_pols; num_xfrms = xdst->num_xfrms; memcpy(pols, xdst->pols, sizeof(struct xfrm_policy *) * num_pols); route = xdst->route; } dst = &xdst->u.dst; if (route == NULL && num_xfrms > 0) { /* The only case when xfrm_bundle_lookup() returns a * bundle with null route, is when the template could * not be resolved. It means policies are there, but * bundle could not be created, since we don't yet * have the xfrm_state's. We need to wait for KM to * negotiate new SA's or bail out with error.*/ if (net->xfrm.sysctl_larval_drop) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); err = -EREMOTE; goto error; } err = -EAGAIN; XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); goto error; } no_transform: if (num_pols == 0) goto nopol; if ((flags & XFRM_LOOKUP_ICMP) && !(pols[0]->flags & XFRM_POLICY_ICMP)) { err = -ENOENT; goto error; } for (i = 0; i < num_pols; i++) WRITE_ONCE(pols[i]->curlft.use_time, ktime_get_real_seconds()); if (num_xfrms < 0) { /* Prohibit the flow */ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); err = -EPERM; goto error; } else if (num_xfrms > 0) { /* Flow transformed */ dst_release(dst_orig); } else { /* Flow passes untransformed */ dst_release(dst); dst = dst_orig; } ok: xfrm_pols_put(pols, drop_pols); if (dst->xfrm && (dst->xfrm->props.mode == XFRM_MODE_TUNNEL || dst->xfrm->props.mode == XFRM_MODE_IPTFS)) dst->flags |= DST_XFRM_TUNNEL; return dst; nopol: if ((!dst_orig->dev || !(dst_orig->dev->flags & IFF_LOOPBACK)) && net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) { err = -EPERM; goto error; } if (!(flags & XFRM_LOOKUP_ICMP)) { dst = dst_orig; goto ok; } err = -ENOENT; error: dst_release(dst); dropdst: if (!(flags & XFRM_LOOKUP_KEEP_DST_REF)) dst_release(dst_orig); xfrm_pols_put(pols, drop_pols); return ERR_PTR(err); } EXPORT_SYMBOL(xfrm_lookup_with_ifid); /* Main function: finds/creates a bundle for given flow. * * At the moment we eat a raw IP route. Mostly to speed up lookups * on interfaces with disabled IPsec. */ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, const struct flowi *fl, const struct sock *sk, int flags) { return xfrm_lookup_with_ifid(net, dst_orig, fl, sk, flags, 0); } EXPORT_SYMBOL(xfrm_lookup); /* Callers of xfrm_lookup_route() must ensure a call to dst_output(). * Otherwise we may send out blackholed packets. */ struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, const struct flowi *fl, const struct sock *sk, int flags) { struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk, flags | XFRM_LOOKUP_QUEUE | XFRM_LOOKUP_KEEP_DST_REF); if (PTR_ERR(dst) == -EREMOTE) return make_blackhole(net, dst_orig->ops->family, dst_orig); if (IS_ERR(dst)) dst_release(dst_orig); return dst; } EXPORT_SYMBOL(xfrm_lookup_route); static inline int xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl) { struct sec_path *sp = skb_sec_path(skb); struct xfrm_state *x; if (!sp || idx < 0 || idx >= sp->len) return 0; x = sp->xvec[idx]; if (!x->type->reject) return 0; return x->type->reject(x, skb, fl); } /* When skb is transformed back to its "native" form, we have to * check policy restrictions. At the moment we make this in maximally * stupid way. Shame on me. :-) Of course, connected sockets must * have policy cached at them. */ static inline int xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, unsigned short family, u32 if_id) { if (xfrm_state_kern(x)) return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family); return x->id.proto == tmpl->id.proto && (x->id.spi == tmpl->id.spi || !tmpl->id.spi) && (x->props.reqid == tmpl->reqid || !tmpl->reqid) && x->props.mode == tmpl->mode && (tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) || !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) && !(x->props.mode != XFRM_MODE_TRANSPORT && xfrm_state_addr_cmp(tmpl, x, family)) && (if_id == 0 || if_id == x->if_id); } /* * 0 or more than 0 is returned when validation is succeeded (either bypass * because of optional transport mode, or next index of the matched secpath * state with the template. * -1 is returned when no matching template is found. * Otherwise "-2 - errored_index" is returned. */ static inline int xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start, unsigned short family, u32 if_id) { int idx = start; if (tmpl->optional) { if (tmpl->mode == XFRM_MODE_TRANSPORT) return start; } else start = -1; for (; idx < sp->len; idx++) { if (xfrm_state_ok(tmpl, sp->xvec[idx], family, if_id)) return ++idx; if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) { if (idx < sp->verified_cnt) { /* Secpath entry previously verified, consider optional and * continue searching */ continue; } if (start == -1) start = -2-idx; break; } } return start; } static void decode_session4(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reverse) { struct flowi4 *fl4 = &fl->u.ip4; memset(fl4, 0, sizeof(struct flowi4)); if (reverse) { fl4->saddr = flkeys->addrs.ipv4.dst; fl4->daddr = flkeys->addrs.ipv4.src; fl4->fl4_sport = flkeys->ports.dst; fl4->fl4_dport = flkeys->ports.src; } else { fl4->saddr = flkeys->addrs.ipv4.src; fl4->daddr = flkeys->addrs.ipv4.dst; fl4->fl4_sport = flkeys->ports.src; fl4->fl4_dport = flkeys->ports.dst; } switch (flkeys->basic.ip_proto) { case IPPROTO_GRE: fl4->fl4_gre_key = flkeys->gre.keyid; break; case IPPROTO_ICMP: fl4->fl4_icmp_type = flkeys->icmp.type; fl4->fl4_icmp_code = flkeys->icmp.code; break; } fl4->flowi4_proto = flkeys->basic.ip_proto; fl4->flowi4_dscp = inet_dsfield_to_dscp(flkeys->ip.tos); } #if IS_ENABLED(CONFIG_IPV6) static void decode_session6(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reverse) { struct flowi6 *fl6 = &fl->u.ip6; memset(fl6, 0, sizeof(struct flowi6)); if (reverse) { fl6->saddr = flkeys->addrs.ipv6.dst; fl6->daddr = flkeys->addrs.ipv6.src; fl6->fl6_sport = flkeys->ports.dst; fl6->fl6_dport = flkeys->ports.src; } else { fl6->saddr = flkeys->addrs.ipv6.src; fl6->daddr = flkeys->addrs.ipv6.dst; fl6->fl6_sport = flkeys->ports.src; fl6->fl6_dport = flkeys->ports.dst; } switch (flkeys->basic.ip_proto) { case IPPROTO_GRE: fl6->fl6_gre_key = flkeys->gre.keyid; break; case IPPROTO_ICMPV6: fl6->fl6_icmp_type = flkeys->icmp.type; fl6->fl6_icmp_code = flkeys->icmp.code; break; } fl6->flowi6_proto = flkeys->basic.ip_proto; } #endif int __xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl, unsigned int family, int reverse) { struct xfrm_flow_keys flkeys; memset(&flkeys, 0, sizeof(flkeys)); __skb_flow_dissect(net, skb, &xfrm_session_dissector, &flkeys, NULL, 0, 0, 0, FLOW_DISSECTOR_F_STOP_AT_ENCAP); switch (family) { case AF_INET: decode_session4(&flkeys, fl, reverse); break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: decode_session6(&flkeys, fl, reverse); break; #endif default: return -EAFNOSUPPORT; } fl->flowi_mark = skb->mark; if (reverse) { fl->flowi_oif = skb->skb_iif; } else { int oif = 0; if (skb_dst(skb) && skb_dst(skb)->dev) oif = skb_dst(skb)->dev->ifindex; fl->flowi_oif = oif; } return security_xfrm_decode_session(skb, &fl->flowi_secid); } EXPORT_SYMBOL(__xfrm_decode_session); static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp) { for (; k < sp->len; k++) { if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) { *idxp = k; return 1; } } return 0; } static bool icmp_err_packet(const struct flowi *fl, unsigned short family) { const struct flowi4 *fl4 = &fl->u.ip4; if (family == AF_INET && fl4->flowi4_proto == IPPROTO_ICMP && (fl4->fl4_icmp_type == ICMP_DEST_UNREACH || fl4->fl4_icmp_type == ICMP_TIME_EXCEEDED)) return true; #if IS_ENABLED(CONFIG_IPV6) if (family == AF_INET6) { const struct flowi6 *fl6 = &fl->u.ip6; if (fl6->flowi6_proto == IPPROTO_ICMPV6 && (fl6->fl6_icmp_type == ICMPV6_DEST_UNREACH || fl6->fl6_icmp_type == ICMPV6_PKT_TOOBIG || fl6->fl6_icmp_type == ICMPV6_TIME_EXCEED)) return true; } #endif return false; } static bool xfrm_icmp_flow_decode(struct sk_buff *skb, unsigned short family, const struct flowi *fl, struct flowi *fl1) { bool ret = true; struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); int hl = family == AF_INET ? (sizeof(struct iphdr) + sizeof(struct icmphdr)) : (sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr)); if (!newskb) return true; if (!pskb_pull(newskb, hl)) goto out; skb_reset_network_header(newskb); if (xfrm_decode_session_reverse(dev_net(skb->dev), newskb, fl1, family) < 0) goto out; fl1->flowi_oif = fl->flowi_oif; fl1->flowi_mark = fl->flowi_mark; fl1->flowi_dscp = fl->flowi_dscp; nf_nat_decode_session(newskb, fl1, family); ret = false; out: consume_skb(newskb); return ret; } static bool xfrm_selector_inner_icmp_match(struct sk_buff *skb, unsigned short family, const struct xfrm_selector *sel, const struct flowi *fl) { bool ret = false; if (icmp_err_packet(fl, family)) { struct flowi fl1; if (xfrm_icmp_flow_decode(skb, family, fl, &fl1)) return ret; ret = xfrm_selector_match(sel, &fl1, family); } return ret; } static inline struct xfrm_policy *xfrm_in_fwd_icmp(struct sk_buff *skb, const struct flowi *fl, unsigned short family, u32 if_id) { struct xfrm_policy *pol = NULL; if (icmp_err_packet(fl, family)) { struct flowi fl1; struct net *net = dev_net(skb->dev); if (xfrm_icmp_flow_decode(skb, family, fl, &fl1)) return pol; pol = xfrm_policy_lookup(net, &fl1, family, XFRM_POLICY_FWD, if_id); if (IS_ERR(pol)) pol = NULL; } return pol; } static inline struct dst_entry *xfrm_out_fwd_icmp(struct sk_buff *skb, struct flowi *fl, unsigned short family, struct dst_entry *dst) { if (icmp_err_packet(fl, family)) { struct net *net = dev_net(skb->dev); struct dst_entry *dst2; struct flowi fl1; if (xfrm_icmp_flow_decode(skb, family, fl, &fl1)) return dst; dst_hold(dst); dst2 = xfrm_lookup(net, dst, &fl1, NULL, (XFRM_LOOKUP_QUEUE | XFRM_LOOKUP_ICMP)); if (IS_ERR(dst2)) return dst; if (dst2->xfrm) { dst_release(dst); dst = dst2; } else { dst_release(dst2); } } return dst; } int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) { struct net *net = dev_net(skb->dev); struct xfrm_policy *pol; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int npols = 0; int xfrm_nr; int pi; int reverse; struct flowi fl; int xerr_idx = -1; const struct xfrm_if_cb *ifcb; struct sec_path *sp; u32 if_id = 0; rcu_read_lock(); ifcb = xfrm_if_get_cb(); if (ifcb) { struct xfrm_if_decode_session_result r; if (ifcb->decode_session(skb, family, &r)) { if_id = r.if_id; net = r.net; } } rcu_read_unlock(); reverse = dir & ~XFRM_POLICY_MASK; dir &= XFRM_POLICY_MASK; if (__xfrm_decode_session(net, skb, &fl, family, reverse) < 0) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); return 0; } nf_nat_decode_session(skb, &fl, family); /* First, check used SA against their selectors. */ sp = skb_sec_path(skb); if (sp) { int i; for (i = sp->len - 1; i >= 0; i--) { struct xfrm_state *x = sp->xvec[i]; int ret = 0; if (!xfrm_selector_match(&x->sel, &fl, family)) { ret = 1; if (x->props.flags & XFRM_STATE_ICMP && xfrm_selector_inner_icmp_match(skb, family, &x->sel, &fl)) ret = 0; if (ret) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); return 0; } } } } pol = NULL; sk = sk_to_full_sk(sk); if (sk && sk->sk_policy[dir]) { pol = xfrm_sk_policy_lookup(sk, dir, &fl, family, if_id); if (IS_ERR(pol)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; } } if (!pol) pol = xfrm_policy_lookup(net, &fl, family, dir, if_id); if (IS_ERR(pol)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; } if (!pol && dir == XFRM_POLICY_FWD) pol = xfrm_in_fwd_icmp(skb, &fl, family, if_id); if (!pol) { const bool is_crypto_offload = sp && (xfrm_input_state(skb)->xso.type == XFRM_DEV_OFFLOAD_CRYPTO); if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); return 0; } if (sp && secpath_has_nontransport(sp, 0, &xerr_idx) && !is_crypto_offload) { xfrm_secpath_reject(xerr_idx, skb, &fl); XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); return 0; } return 1; } /* This lockless write can happen from different cpus. */ WRITE_ONCE(pol->curlft.use_time, ktime_get_real_seconds()); pols[0] = pol; npols++; #ifdef CONFIG_XFRM_SUB_POLICY if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, &fl, family, XFRM_POLICY_IN, if_id); if (pols[1]) { if (IS_ERR(pols[1])) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); xfrm_pol_put(pols[0]); return 0; } /* This write can happen from different cpus. */ WRITE_ONCE(pols[1]->curlft.use_time, ktime_get_real_seconds()); npols++; } } #endif if (pol->action == XFRM_POLICY_ALLOW) { static struct sec_path dummy; struct xfrm_tmpl *tp[XFRM_MAX_DEPTH]; struct xfrm_tmpl *stp[XFRM_MAX_DEPTH]; struct xfrm_tmpl **tpp = tp; int ti = 0; int i, k; sp = skb_sec_path(skb); if (!sp) sp = &dummy; for (pi = 0; pi < npols; pi++) { if (pols[pi] != pol && pols[pi]->action != XFRM_POLICY_ALLOW) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); goto reject; } if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); goto reject_error; } for (i = 0; i < pols[pi]->xfrm_nr; i++) tpp[ti++] = &pols[pi]->xfrm_vec[i]; } xfrm_nr = ti; if (npols > 1) { xfrm_tmpl_sort(stp, tpp, xfrm_nr, family); tpp = stp; } /* For each tunnel xfrm, find the first matching tmpl. * For each tmpl before that, find corresponding xfrm. * Order is _important_. Later we will implement * some barriers, but at the moment barriers * are implied between each two transformations. * Upon success, marks secpath entries as having been * verified to allow them to be skipped in future policy * checks (e.g. nested tunnels). */ for (i = xfrm_nr-1, k = 0; i >= 0; i--) { k = xfrm_policy_ok(tpp[i], sp, k, family, if_id); if (k < 0) { if (k < -1) /* "-2 - errored_index" returned */ xerr_idx = -(2+k); XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); goto reject; } } if (secpath_has_nontransport(sp, k, &xerr_idx)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); goto reject; } xfrm_pols_put(pols, npols); sp->verified_cnt = k; return 1; } XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); reject: xfrm_secpath_reject(xerr_idx, skb, &fl); reject_error: xfrm_pols_put(pols, npols); return 0; } EXPORT_SYMBOL(__xfrm_policy_check); int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) { struct net *net = dev_net(skb->dev); struct flowi fl; struct dst_entry *dst; int res = 1; if (xfrm_decode_session(net, skb, &fl, family) < 0) { XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); return 0; } skb_dst_force(skb); dst = skb_dst(skb); if (!dst) { XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); return 0; } /* ignore return value from skb_dstref_steal, xfrm_lookup takes * care of dropping the refcnt if needed. */ skb_dstref_steal(skb); dst = xfrm_lookup(net, dst, &fl, NULL, XFRM_LOOKUP_QUEUE); if (IS_ERR(dst)) { res = 0; dst = NULL; } if (dst && !dst->xfrm) dst = xfrm_out_fwd_icmp(skb, &fl, family, dst); skb_dst_set(skb, dst); return res; } EXPORT_SYMBOL(__xfrm_route_forward); /* Optimize later using cookies and generation ids. */ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) { /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to * get validated by dst_ops->check on every use. We do this * because when a normal route referenced by an XFRM dst is * obsoleted we do not go looking around for all parent * referencing XFRM dsts so that we can invalidate them. It * is just too much work. Instead we make the checks here on * every use. For example: * * XFRM dst A --> IPv4 dst X * * X is the "xdst->route" of A (X is also the "dst->path" of A * in this example). If X is marked obsolete, "A" will not * notice. That's what we are validating here via the * stale_bundle() check. * * When a dst is removed from the fib tree, DST_OBSOLETE_DEAD will * be marked on it. * This will force stale_bundle() to fail on any xdst bundle with * this dst linked in it. */ if (READ_ONCE(dst->obsolete) < 0 && !stale_bundle(dst)) return dst; return NULL; } static int stale_bundle(struct dst_entry *dst) { return !xfrm_bundle_ok((struct xfrm_dst *)dst); } void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) { while ((dst = xfrm_dst_child(dst)) && dst->xfrm && dst->dev == dev) { dst->dev = blackhole_netdev; dev_hold(dst->dev); dev_put(dev); } } EXPORT_SYMBOL(xfrm_dst_ifdown); static void xfrm_link_failure(struct sk_buff *skb) { /* Impossible. Such dst must be popped before reaches point of failure. */ } static void xfrm_negative_advice(struct sock *sk, struct dst_entry *dst) { if (READ_ONCE(dst->obsolete)) sk_dst_reset(sk); } static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr) { while (nr--) { struct xfrm_dst *xdst = bundle[nr]; u32 pmtu, route_mtu_cached; struct dst_entry *dst; dst = &xdst->u.dst; pmtu = dst_mtu(xfrm_dst_child(dst)); xdst->child_mtu_cached = pmtu; pmtu = xfrm_state_mtu(dst->xfrm, pmtu); route_mtu_cached = dst_mtu(xdst->route); xdst->route_mtu_cached = route_mtu_cached; if (pmtu > route_mtu_cached) pmtu = route_mtu_cached; dst_metric_set(dst, RTAX_MTU, pmtu); } } /* Check that the bundle accepts the flow and its components are * still valid. */ static int xfrm_bundle_ok(struct xfrm_dst *first) { struct xfrm_dst *bundle[XFRM_MAX_DEPTH]; struct dst_entry *dst = &first->u.dst; struct xfrm_dst *xdst; int start_from, nr; u32 mtu; if (!dst_check(xfrm_dst_path(dst), ((struct xfrm_dst *)dst)->path_cookie) || (dst->dev && !netif_running(dst->dev))) return 0; if (dst->flags & DST_XFRM_QUEUE) return 1; start_from = nr = 0; do { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; if (dst->xfrm->km.state != XFRM_STATE_VALID) return 0; if (xdst->xfrm_genid != dst->xfrm->genid) return 0; if (xdst->num_pols > 0 && xdst->policy_genid != atomic_read(&xdst->pols[0]->genid)) return 0; bundle[nr++] = xdst; mtu = dst_mtu(xfrm_dst_child(dst)); if (xdst->child_mtu_cached != mtu) { start_from = nr; xdst->child_mtu_cached = mtu; } if (!dst_check(xdst->route, xdst->route_cookie)) return 0; mtu = dst_mtu(xdst->route); if (xdst->route_mtu_cached != mtu) { start_from = nr; xdst->route_mtu_cached = mtu; } dst = xfrm_dst_child(dst); } while (dst->xfrm); if (likely(!start_from)) return 1; xdst = bundle[start_from - 1]; mtu = xdst->child_mtu_cached; while (start_from--) { dst = &xdst->u.dst; mtu = xfrm_state_mtu(dst->xfrm, mtu); if (mtu > xdst->route_mtu_cached) mtu = xdst->route_mtu_cached; dst_metric_set(dst, RTAX_MTU, mtu); if (!start_from) break; xdst = bundle[start_from - 1]; xdst->child_mtu_cached = mtu; } return 1; } static unsigned int xfrm_default_advmss(const struct dst_entry *dst) { return dst_metric_advmss(xfrm_dst_path(dst)); } static unsigned int xfrm_mtu(const struct dst_entry *dst) { unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); return mtu ? : dst_mtu(xfrm_dst_path(dst)); } static const void *xfrm_get_dst_nexthop(const struct dst_entry *dst, const void *daddr) { while (dst->xfrm) { const struct xfrm_state *xfrm = dst->xfrm; dst = xfrm_dst_child(dst); if (xfrm->props.mode == XFRM_MODE_TRANSPORT) continue; if (xfrm->type->flags & XFRM_TYPE_REMOTE_COADDR) daddr = xfrm->coaddr; else if (!(xfrm->type->flags & XFRM_TYPE_LOCAL_COADDR)) daddr = &xfrm->id.daddr; } return daddr; } static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr) { const struct dst_entry *path = xfrm_dst_path(dst); if (!skb) daddr = xfrm_get_dst_nexthop(dst, daddr); return path->ops->neigh_lookup(path, skb, daddr); } static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr) { const struct dst_entry *path = xfrm_dst_path(dst); daddr = xfrm_get_dst_nexthop(dst, daddr); path->ops->confirm_neigh(path, daddr); } int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int family) { int err = 0; if (WARN_ON(family >= ARRAY_SIZE(xfrm_policy_afinfo))) return -EAFNOSUPPORT; spin_lock(&xfrm_policy_afinfo_lock); if (unlikely(xfrm_policy_afinfo[family] != NULL)) err = -EEXIST; else { struct dst_ops *dst_ops = afinfo->dst_ops; if (likely(dst_ops->kmem_cachep == NULL)) dst_ops->kmem_cachep = xfrm_dst_cache; if (likely(dst_ops->check == NULL)) dst_ops->check = xfrm_dst_check; if (likely(dst_ops->default_advmss == NULL)) dst_ops->default_advmss = xfrm_default_advmss; if (likely(dst_ops->mtu == NULL)) dst_ops->mtu = xfrm_mtu; if (likely(dst_ops->negative_advice == NULL)) dst_ops->negative_advice = xfrm_negative_advice; if (likely(dst_ops->link_failure == NULL)) dst_ops->link_failure = xfrm_link_failure; if (likely(dst_ops->neigh_lookup == NULL)) dst_ops->neigh_lookup = xfrm_neigh_lookup; if (likely(!dst_ops->confirm_neigh)) dst_ops->confirm_neigh = xfrm_confirm_neigh; rcu_assign_pointer(xfrm_policy_afinfo[family], afinfo); } spin_unlock(&xfrm_policy_afinfo_lock); return err; } EXPORT_SYMBOL(xfrm_policy_register_afinfo); void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo) { struct dst_ops *dst_ops = afinfo->dst_ops; int i; for (i = 0; i < ARRAY_SIZE(xfrm_policy_afinfo); i++) { if (xfrm_policy_afinfo[i] != afinfo) continue; RCU_INIT_POINTER(xfrm_policy_afinfo[i], NULL); break; } synchronize_rcu(); dst_ops->kmem_cachep = NULL; dst_ops->check = NULL; dst_ops->negative_advice = NULL; dst_ops->link_failure = NULL; } EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb) { spin_lock(&xfrm_if_cb_lock); rcu_assign_pointer(xfrm_if_cb, ifcb); spin_unlock(&xfrm_if_cb_lock); } EXPORT_SYMBOL(xfrm_if_register_cb); void xfrm_if_unregister_cb(void) { RCU_INIT_POINTER(xfrm_if_cb, NULL); synchronize_rcu(); } EXPORT_SYMBOL(xfrm_if_unregister_cb); #ifdef CONFIG_XFRM_STATISTICS static int __net_init xfrm_statistics_init(struct net *net) { int rv; net->mib.xfrm_statistics = alloc_percpu(struct linux_xfrm_mib); if (!net->mib.xfrm_statistics) return -ENOMEM; rv = xfrm_proc_init(net); if (rv < 0) free_percpu(net->mib.xfrm_statistics); return rv; } static void xfrm_statistics_fini(struct net *net) { xfrm_proc_fini(net); free_percpu(net->mib.xfrm_statistics); } #else static int __net_init xfrm_statistics_init(struct net *net) { return 0; } static void xfrm_statistics_fini(struct net *net) { } #endif static int __net_init xfrm_policy_init(struct net *net) { unsigned int hmask, sz; int dir, err; if (net_eq(net, &init_net)) { xfrm_dst_cache = KMEM_CACHE(xfrm_dst, SLAB_HWCACHE_ALIGN | SLAB_PANIC); err = rhashtable_init(&xfrm_policy_inexact_table, &xfrm_pol_inexact_params); BUG_ON(err); } hmask = 8 - 1; sz = (hmask+1) * sizeof(struct hlist_head); net->xfrm.policy_byidx = xfrm_hash_alloc(sz); if (!net->xfrm.policy_byidx) goto out_byidx; net->xfrm.policy_idx_hmask = hmask; for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { struct xfrm_policy_hash *htab; net->xfrm.policy_count[dir] = 0; net->xfrm.policy_count[XFRM_POLICY_MAX + dir] = 0; htab = &net->xfrm.policy_bydst[dir]; htab->table = xfrm_hash_alloc(sz); if (!htab->table) goto out_bydst; htab->hmask = hmask; htab->dbits4 = 32; htab->sbits4 = 32; htab->dbits6 = 128; htab->sbits6 = 128; } net->xfrm.policy_hthresh.lbits4 = 32; net->xfrm.policy_hthresh.rbits4 = 32; net->xfrm.policy_hthresh.lbits6 = 128; net->xfrm.policy_hthresh.rbits6 = 128; seqlock_init(&net->xfrm.policy_hthresh.lock); INIT_LIST_HEAD(&net->xfrm.policy_all); INIT_LIST_HEAD(&net->xfrm.inexact_bins); INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize); INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild); return 0; out_bydst: for (dir--; dir >= 0; dir--) { struct xfrm_policy_hash *htab; htab = &net->xfrm.policy_bydst[dir]; xfrm_hash_free(htab->table, sz); } xfrm_hash_free(net->xfrm.policy_byidx, sz); out_byidx: return -ENOMEM; } static void xfrm_policy_fini(struct net *net) { struct xfrm_pol_inexact_bin *b, *t; unsigned int sz; int dir; flush_work(&net->xfrm.policy_hash_work); #ifdef CONFIG_XFRM_SUB_POLICY xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false); #endif xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false); WARN_ON(!list_empty(&net->xfrm.policy_all)); for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { struct xfrm_policy_hash *htab; htab = &net->xfrm.policy_bydst[dir]; sz = (htab->hmask + 1) * sizeof(struct hlist_head); WARN_ON(!hlist_empty(htab->table)); xfrm_hash_free(htab->table, sz); } sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head); WARN_ON(!hlist_empty(net->xfrm.policy_byidx)); xfrm_hash_free(net->xfrm.policy_byidx, sz); spin_lock_bh(&net->xfrm.xfrm_policy_lock); list_for_each_entry_safe(b, t, &net->xfrm.inexact_bins, inexact_bins) __xfrm_policy_inexact_prune_bin(b, true); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); } static int __net_init xfrm_net_init(struct net *net) { int rv; /* Initialize the per-net locks here */ spin_lock_init(&net->xfrm.xfrm_state_lock); spin_lock_init(&net->xfrm.xfrm_policy_lock); seqcount_spinlock_init(&net->xfrm.xfrm_policy_hash_generation, &net->xfrm.xfrm_policy_lock); mutex_init(&net->xfrm.xfrm_cfg_mutex); net->xfrm.policy_default[XFRM_POLICY_IN] = XFRM_USERPOLICY_ACCEPT; net->xfrm.policy_default[XFRM_POLICY_FWD] = XFRM_USERPOLICY_ACCEPT; net->xfrm.policy_default[XFRM_POLICY_OUT] = XFRM_USERPOLICY_ACCEPT; rv = xfrm_statistics_init(net); if (rv < 0) goto out_statistics; rv = xfrm_state_init(net); if (rv < 0) goto out_state; rv = xfrm_policy_init(net); if (rv < 0) goto out_policy; rv = xfrm_sysctl_init(net); if (rv < 0) goto out_sysctl; rv = xfrm_nat_keepalive_net_init(net); if (rv < 0) goto out_nat_keepalive; return 0; out_nat_keepalive: xfrm_sysctl_fini(net); out_sysctl: xfrm_policy_fini(net); out_policy: xfrm_state_fini(net); out_state: xfrm_statistics_fini(net); out_statistics: return rv; } static void __net_exit xfrm_net_exit(struct net *net) { xfrm_nat_keepalive_net_fini(net); xfrm_sysctl_fini(net); xfrm_policy_fini(net); xfrm_state_fini(net); xfrm_statistics_fini(net); } static struct pernet_operations __net_initdata xfrm_net_ops = { .init = xfrm_net_init, .exit = xfrm_net_exit, }; static const struct flow_dissector_key xfrm_flow_dissector_keys[] = { { .key_id = FLOW_DISSECTOR_KEY_CONTROL, .offset = offsetof(struct xfrm_flow_keys, control), }, { .key_id = FLOW_DISSECTOR_KEY_BASIC, .offset = offsetof(struct xfrm_flow_keys, basic), }, { .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, .offset = offsetof(struct xfrm_flow_keys, addrs.ipv4), }, { .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, .offset = offsetof(struct xfrm_flow_keys, addrs.ipv6), }, { .key_id = FLOW_DISSECTOR_KEY_PORTS, .offset = offsetof(struct xfrm_flow_keys, ports), }, { .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID, .offset = offsetof(struct xfrm_flow_keys, gre), }, { .key_id = FLOW_DISSECTOR_KEY_IP, .offset = offsetof(struct xfrm_flow_keys, ip), }, { .key_id = FLOW_DISSECTOR_KEY_ICMP, .offset = offsetof(struct xfrm_flow_keys, icmp), }, }; void __init xfrm_init(void) { skb_flow_dissector_init(&xfrm_session_dissector, xfrm_flow_dissector_keys, ARRAY_SIZE(xfrm_flow_dissector_keys)); register_pernet_subsys(&xfrm_net_ops); xfrm_dev_init(); xfrm_input_init(); #ifdef CONFIG_XFRM_ESPINTCP espintcp_init(); #endif register_xfrm_state_bpf(); xfrm_nat_keepalive_init(AF_INET); } #ifdef CONFIG_AUDITSYSCALL static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, struct audit_buffer *audit_buf) { struct xfrm_sec_ctx *ctx = xp->security; struct xfrm_selector *sel = &xp->selector; if (ctx) audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s", ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str); switch (sel->family) { case AF_INET: audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4); if (sel->prefixlen_s != 32) audit_log_format(audit_buf, " src_prefixlen=%d", sel->prefixlen_s); audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4); if (sel->prefixlen_d != 32) audit_log_format(audit_buf, " dst_prefixlen=%d", sel->prefixlen_d); break; case AF_INET6: audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6); if (sel->prefixlen_s != 128) audit_log_format(audit_buf, " src_prefixlen=%d", sel->prefixlen_s); audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6); if (sel->prefixlen_d != 128) audit_log_format(audit_buf, " dst_prefixlen=%d", sel->prefixlen_d); break; } } void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SPD-add"); if (audit_buf == NULL) return; xfrm_audit_helper_usrinfo(task_valid, audit_buf); audit_log_format(audit_buf, " res=%u", result); xfrm_audit_common_policyinfo(xp, audit_buf); audit_log_end(audit_buf); } EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, bool task_valid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SPD-delete"); if (audit_buf == NULL) return; xfrm_audit_helper_usrinfo(task_valid, audit_buf); audit_log_format(audit_buf, " res=%u", result); xfrm_audit_common_policyinfo(xp, audit_buf); audit_log_end(audit_buf); } EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete); #endif #ifdef CONFIG_XFRM_MIGRATE static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel, u8 dir, u8 type, struct net *net, u32 if_id) { struct xfrm_policy *pol; struct flowi fl; memset(&fl, 0, sizeof(fl)); fl.flowi_proto = sel->proto; switch (sel->family) { case AF_INET: fl.u.ip4.saddr = sel->saddr.a4; fl.u.ip4.daddr = sel->daddr.a4; if (sel->proto == IPSEC_ULPROTO_ANY) break; fl.u.flowi4_oif = sel->ifindex; fl.u.ip4.fl4_sport = sel->sport; fl.u.ip4.fl4_dport = sel->dport; break; case AF_INET6: fl.u.ip6.saddr = sel->saddr.in6; fl.u.ip6.daddr = sel->daddr.in6; if (sel->proto == IPSEC_ULPROTO_ANY) break; fl.u.flowi6_oif = sel->ifindex; fl.u.ip6.fl4_sport = sel->sport; fl.u.ip6.fl4_dport = sel->dport; break; default: return ERR_PTR(-EAFNOSUPPORT); } rcu_read_lock(); pol = xfrm_policy_lookup_bytype(net, type, &fl, sel->family, dir, if_id); if (IS_ERR_OR_NULL(pol)) goto out_unlock; if (!xfrm_pol_hold_rcu(pol)) pol = NULL; out_unlock: rcu_read_unlock(); return pol; } static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t) { int match = 0; if (t->mode == m->mode && t->id.proto == m->proto && (m->reqid == 0 || t->reqid == m->reqid)) { switch (t->mode) { case XFRM_MODE_TUNNEL: case XFRM_MODE_BEET: case XFRM_MODE_IPTFS: if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr, m->old_family) && xfrm_addr_equal(&t->saddr, &m->old_saddr, m->old_family)) { match = 1; } break; case XFRM_MODE_TRANSPORT: /* in case of transport mode, template does not store any IP addresses, hence we just compare mode and protocol */ match = 1; break; default: break; } } return match; } /* update endpoint address(es) of template(s) */ static int xfrm_policy_migrate(struct xfrm_policy *pol, struct xfrm_migrate *m, int num_migrate, struct netlink_ext_ack *extack) { struct xfrm_migrate *mp; int i, j, n = 0; write_lock_bh(&pol->lock); if (unlikely(pol->walk.dead)) { /* target policy has been deleted */ NL_SET_ERR_MSG(extack, "Target policy not found"); write_unlock_bh(&pol->lock); return -ENOENT; } for (i = 0; i < pol->xfrm_nr; i++) { for (j = 0, mp = m; j < num_migrate; j++, mp++) { if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i])) continue; n++; if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL && pol->xfrm_vec[i].mode != XFRM_MODE_BEET && pol->xfrm_vec[i].mode != XFRM_MODE_IPTFS) continue; /* update endpoints */ memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr, sizeof(pol->xfrm_vec[i].id.daddr)); memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr, sizeof(pol->xfrm_vec[i].saddr)); pol->xfrm_vec[i].encap_family = mp->new_family; /* flush bundles */ atomic_inc(&pol->genid); } } write_unlock_bh(&pol->lock); if (!n) return -ENODATA; return 0; } static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate, struct netlink_ext_ack *extack) { int i, j; if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH) { NL_SET_ERR_MSG(extack, "Invalid number of SAs to migrate, must be 0 < num <= XFRM_MAX_DEPTH (6)"); return -EINVAL; } for (i = 0; i < num_migrate; i++) { if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) || xfrm_addr_any(&m[i].new_saddr, m[i].new_family)) { NL_SET_ERR_MSG(extack, "Addresses in the MIGRATE attribute's list cannot be null"); return -EINVAL; } /* check if there is any duplicated entry */ for (j = i + 1; j < num_migrate; j++) { if (!memcmp(&m[i].old_daddr, &m[j].old_daddr, sizeof(m[i].old_daddr)) && !memcmp(&m[i].old_saddr, &m[j].old_saddr, sizeof(m[i].old_saddr)) && m[i].proto == m[j].proto && m[i].mode == m[j].mode && m[i].reqid == m[j].reqid && m[i].old_family == m[j].old_family) { NL_SET_ERR_MSG(extack, "Entries in the MIGRATE attribute's list must be unique"); return -EINVAL; } } } return 0; } int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_migrate, struct xfrm_kmaddress *k, struct net *net, struct xfrm_encap_tmpl *encap, u32 if_id, struct netlink_ext_ack *extack, struct xfrm_user_offload *xuo) { int i, err, nx_cur = 0, nx_new = 0; struct xfrm_policy *pol = NULL; struct xfrm_state *x, *xc; struct xfrm_state *x_cur[XFRM_MAX_DEPTH]; struct xfrm_state *x_new[XFRM_MAX_DEPTH]; struct xfrm_migrate *mp; /* Stage 0 - sanity checks */ err = xfrm_migrate_check(m, num_migrate, extack); if (err < 0) goto out; if (dir >= XFRM_POLICY_MAX) { NL_SET_ERR_MSG(extack, "Invalid policy direction"); err = -EINVAL; goto out; } /* Stage 1 - find policy */ pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id); if (IS_ERR_OR_NULL(pol)) { NL_SET_ERR_MSG(extack, "Target policy not found"); err = IS_ERR(pol) ? PTR_ERR(pol) : -ENOENT; goto out; } /* Stage 2 - find and update state(s) */ for (i = 0, mp = m; i < num_migrate; i++, mp++) { if ((x = xfrm_migrate_state_find(mp, net, if_id))) { x_cur[nx_cur] = x; nx_cur++; xc = xfrm_state_migrate(x, mp, encap, net, xuo, extack); if (xc) { x_new[nx_new] = xc; nx_new++; } else { err = -ENODATA; goto restore_state; } } } /* Stage 3 - update policy */ err = xfrm_policy_migrate(pol, m, num_migrate, extack); if (err < 0) goto restore_state; /* Stage 4 - delete old state(s) */ if (nx_cur) { xfrm_states_put(x_cur, nx_cur); xfrm_states_delete(x_cur, nx_cur); } /* Stage 5 - announce */ km_migrate(sel, dir, type, m, num_migrate, k, encap); xfrm_pol_put(pol); return 0; out: return err; restore_state: if (pol) xfrm_pol_put(pol); if (nx_cur) xfrm_states_put(x_cur, nx_cur); if (nx_new) xfrm_states_delete(x_new, nx_new); return err; } EXPORT_SYMBOL(xfrm_migrate); #endif
6 6 6 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 // SPDX-License-Identifier: GPL-2.0-or-later #include <linux/mrp_bridge.h> #include "br_private_mrp.h" static const u8 mrp_test_dmac[ETH_ALEN] = { 0x1, 0x15, 0x4e, 0x0, 0x0, 0x1 }; static const u8 mrp_in_test_dmac[ETH_ALEN] = { 0x1, 0x15, 0x4e, 0x0, 0x0, 0x3 }; static int br_mrp_process(struct net_bridge_port *p, struct sk_buff *skb); static struct br_frame_type mrp_frame_type __read_mostly = { .type = cpu_to_be16(ETH_P_MRP), .frame_handler = br_mrp_process, }; static bool br_mrp_is_ring_port(struct net_bridge_port *p_port, struct net_bridge_port *s_port, struct net_bridge_port *port) { if (port == p_port || port == s_port) return true; return false; } static bool br_mrp_is_in_port(struct net_bridge_port *i_port, struct net_bridge_port *port) { if (port == i_port) return true; return false; } static struct net_bridge_port *br_mrp_get_port(struct net_bridge *br, u32 ifindex) { struct net_bridge_port *res = NULL; struct net_bridge_port *port; list_for_each_entry(port, &br->port_list, list) { if (port->dev->ifindex == ifindex) { res = port; break; } } return res; } static struct br_mrp *br_mrp_find_id(struct net_bridge *br, u32 ring_id) { struct br_mrp *res = NULL; struct br_mrp *mrp; hlist_for_each_entry_rcu(mrp, &br->mrp_list, list, lockdep_rtnl_is_held()) { if (mrp->ring_id == ring_id) { res = mrp; break; } } return res; } static struct br_mrp *br_mrp_find_in_id(struct net_bridge *br, u32 in_id) { struct br_mrp *res = NULL; struct br_mrp *mrp; hlist_for_each_entry_rcu(mrp, &br->mrp_list, list, lockdep_rtnl_is_held()) { if (mrp->in_id == in_id) { res = mrp; break; } } return res; } static bool br_mrp_unique_ifindex(struct net_bridge *br, u32 ifindex) { struct br_mrp *mrp; hlist_for_each_entry_rcu(mrp, &br->mrp_list, list, lockdep_rtnl_is_held()) { struct net_bridge_port *p; p = rtnl_dereference(mrp->p_port); if (p && p->dev->ifindex == ifindex) return false; p = rtnl_dereference(mrp->s_port); if (p && p->dev->ifindex == ifindex) return false; p = rtnl_dereference(mrp->i_port); if (p && p->dev->ifindex == ifindex) return false; } return true; } static struct br_mrp *br_mrp_find_port(struct net_bridge *br, struct net_bridge_port *p) { struct br_mrp *res = NULL; struct br_mrp *mrp; hlist_for_each_entry_rcu(mrp, &br->mrp_list, list, lockdep_rtnl_is_held()) { if (rcu_access_pointer(mrp->p_port) == p || rcu_access_pointer(mrp->s_port) == p || rcu_access_pointer(mrp->i_port) == p) { res = mrp; break; } } return res; } static int br_mrp_next_seq(struct br_mrp *mrp) { mrp->seq_id++; return mrp->seq_id; } static struct sk_buff *br_mrp_skb_alloc(struct net_bridge_port *p, const u8 *src, const u8 *dst) { struct ethhdr *eth_hdr; struct sk_buff *skb; __be16 *version; skb = dev_alloc_skb(MRP_MAX_FRAME_LENGTH); if (!skb) return NULL; skb->dev = p->dev; skb->protocol = htons(ETH_P_MRP); skb->priority = MRP_FRAME_PRIO; skb_reserve(skb, sizeof(*eth_hdr)); eth_hdr = skb_push(skb, sizeof(*eth_hdr)); ether_addr_copy(eth_hdr->h_dest, dst); ether_addr_copy(eth_hdr->h_source, src); eth_hdr->h_proto = htons(ETH_P_MRP); version = skb_put(skb, sizeof(*version)); *version = cpu_to_be16(MRP_VERSION); return skb; } static void br_mrp_skb_tlv(struct sk_buff *skb, enum br_mrp_tlv_header_type type, u8 length) { struct br_mrp_tlv_hdr *hdr; hdr = skb_put(skb, sizeof(*hdr)); hdr->type = type; hdr->length = length; } static void br_mrp_skb_common(struct sk_buff *skb, struct br_mrp *mrp) { struct br_mrp_common_hdr *hdr; br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_COMMON, sizeof(*hdr)); hdr = skb_put(skb, sizeof(*hdr)); hdr->seq_id = cpu_to_be16(br_mrp_next_seq(mrp)); memset(hdr->domain, 0xff, MRP_DOMAIN_UUID_LENGTH); } static struct sk_buff *br_mrp_alloc_test_skb(struct br_mrp *mrp, struct net_bridge_port *p, enum br_mrp_port_role_type port_role) { struct br_mrp_ring_test_hdr *hdr = NULL; struct sk_buff *skb = NULL; if (!p) return NULL; skb = br_mrp_skb_alloc(p, p->dev->dev_addr, mrp_test_dmac); if (!skb) return NULL; br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_RING_TEST, sizeof(*hdr)); hdr = skb_put(skb, sizeof(*hdr)); hdr->prio = cpu_to_be16(mrp->prio); ether_addr_copy(hdr->sa, p->br->dev->dev_addr); hdr->port_role = cpu_to_be16(port_role); hdr->state = cpu_to_be16(mrp->ring_state); hdr->transitions = cpu_to_be16(mrp->ring_transitions); hdr->timestamp = cpu_to_be32(jiffies_to_msecs(jiffies)); br_mrp_skb_common(skb, mrp); /* In case the node behaves as MRA then the Test frame needs to have * an Option TLV which includes eventually a sub-option TLV that has * the type AUTO_MGR */ if (mrp->ring_role == BR_MRP_RING_ROLE_MRA) { struct br_mrp_sub_option1_hdr *sub_opt = NULL; struct br_mrp_tlv_hdr *sub_tlv = NULL; struct br_mrp_oui_hdr *oui = NULL; u8 length; length = sizeof(*sub_opt) + sizeof(*sub_tlv) + sizeof(oui) + MRP_OPT_PADDING; br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_OPTION, length); oui = skb_put(skb, sizeof(*oui)); memset(oui, 0x0, sizeof(*oui)); sub_opt = skb_put(skb, sizeof(*sub_opt)); memset(sub_opt, 0x0, sizeof(*sub_opt)); sub_tlv = skb_put(skb, sizeof(*sub_tlv)); sub_tlv->type = BR_MRP_SUB_TLV_HEADER_TEST_AUTO_MGR; /* 32 bit alligment shall be ensured therefore add 2 bytes */ skb_put(skb, MRP_OPT_PADDING); } br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_END, 0x0); return skb; } static struct sk_buff *br_mrp_alloc_in_test_skb(struct br_mrp *mrp, struct net_bridge_port *p, enum br_mrp_port_role_type port_role) { struct br_mrp_in_test_hdr *hdr = NULL; struct sk_buff *skb = NULL; if (!p) return NULL; skb = br_mrp_skb_alloc(p, p->dev->dev_addr, mrp_in_test_dmac); if (!skb) return NULL; br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_IN_TEST, sizeof(*hdr)); hdr = skb_put(skb, sizeof(*hdr)); hdr->id = cpu_to_be16(mrp->in_id); ether_addr_copy(hdr->sa, p->br->dev->dev_addr); hdr->port_role = cpu_to_be16(port_role); hdr->state = cpu_to_be16(mrp->in_state); hdr->transitions = cpu_to_be16(mrp->in_transitions); hdr->timestamp = cpu_to_be32(jiffies_to_msecs(jiffies)); br_mrp_skb_common(skb, mrp); br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_END, 0x0); return skb; } /* This function is continuously called in the following cases: * - when node role is MRM, in this case test_monitor is always set to false * because it needs to notify the userspace that the ring is open and needs to * send MRP_Test frames * - when node role is MRA, there are 2 subcases: * - when MRA behaves as MRM, in this case is similar with MRM role * - when MRA behaves as MRC, in this case test_monitor is set to true, * because it needs to detect when it stops seeing MRP_Test frames * from MRM node but it doesn't need to send MRP_Test frames. */ static void br_mrp_test_work_expired(struct work_struct *work) { struct delayed_work *del_work = to_delayed_work(work); struct br_mrp *mrp = container_of(del_work, struct br_mrp, test_work); struct net_bridge_port *p; bool notify_open = false; struct sk_buff *skb; if (time_before_eq(mrp->test_end, jiffies)) return; if (mrp->test_count_miss < mrp->test_max_miss) { mrp->test_count_miss++; } else { /* Notify that the ring is open only if the ring state is * closed, otherwise it would continue to notify at every * interval. * Also notify that the ring is open when the node has the * role MRA and behaves as MRC. The reason is that the * userspace needs to know when the MRM stopped sending * MRP_Test frames so that the current node to try to take * the role of a MRM. */ if (mrp->ring_state == BR_MRP_RING_STATE_CLOSED || mrp->test_monitor) notify_open = true; } rcu_read_lock(); p = rcu_dereference(mrp->p_port); if (p) { if (!mrp->test_monitor) { skb = br_mrp_alloc_test_skb(mrp, p, BR_MRP_PORT_ROLE_PRIMARY); if (!skb) goto out; skb_reset_network_header(skb); dev_queue_xmit(skb); } if (notify_open && !mrp->ring_role_offloaded) br_mrp_ring_port_open(p->dev, true); } p = rcu_dereference(mrp->s_port); if (p) { if (!mrp->test_monitor) { skb = br_mrp_alloc_test_skb(mrp, p, BR_MRP_PORT_ROLE_SECONDARY); if (!skb) goto out; skb_reset_network_header(skb); dev_queue_xmit(skb); } if (notify_open && !mrp->ring_role_offloaded) br_mrp_ring_port_open(p->dev, true); } out: rcu_read_unlock(); queue_delayed_work(system_percpu_wq, &mrp->test_work, usecs_to_jiffies(mrp->test_interval)); } /* This function is continuously called when the node has the interconnect role * MIM. It would generate interconnect test frames and will send them on all 3 * ports. But will also check if it stop receiving interconnect test frames. */ static void br_mrp_in_test_work_expired(struct work_struct *work) { struct delayed_work *del_work = to_delayed_work(work); struct br_mrp *mrp = container_of(del_work, struct br_mrp, in_test_work); struct net_bridge_port *p; bool notify_open = false; struct sk_buff *skb; if (time_before_eq(mrp->in_test_end, jiffies)) return; if (mrp->in_test_count_miss < mrp->in_test_max_miss) { mrp->in_test_count_miss++; } else { /* Notify that the interconnect ring is open only if the * interconnect ring state is closed, otherwise it would * continue to notify at every interval. */ if (mrp->in_state == BR_MRP_IN_STATE_CLOSED) notify_open = true; } rcu_read_lock(); p = rcu_dereference(mrp->p_port); if (p) { skb = br_mrp_alloc_in_test_skb(mrp, p, BR_MRP_PORT_ROLE_PRIMARY); if (!skb) goto out; skb_reset_network_header(skb); dev_queue_xmit(skb); if (notify_open && !mrp->in_role_offloaded) br_mrp_in_port_open(p->dev, true); } p = rcu_dereference(mrp->s_port); if (p) { skb = br_mrp_alloc_in_test_skb(mrp, p, BR_MRP_PORT_ROLE_SECONDARY); if (!skb) goto out; skb_reset_network_header(skb); dev_queue_xmit(skb); if (notify_open && !mrp->in_role_offloaded) br_mrp_in_port_open(p->dev, true); } p = rcu_dereference(mrp->i_port); if (p) { skb = br_mrp_alloc_in_test_skb(mrp, p, BR_MRP_PORT_ROLE_INTER); if (!skb) goto out; skb_reset_network_header(skb); dev_queue_xmit(skb); if (notify_open && !mrp->in_role_offloaded) br_mrp_in_port_open(p->dev, true); } out: rcu_read_unlock(); queue_delayed_work(system_percpu_wq, &mrp->in_test_work, usecs_to_jiffies(mrp->in_test_interval)); } /* Deletes the MRP instance. * note: called under rtnl_lock */ static void br_mrp_del_impl(struct net_bridge *br, struct br_mrp *mrp) { struct net_bridge_port *p; u8 state; /* Stop sending MRP_Test frames */ cancel_delayed_work_sync(&mrp->test_work); br_mrp_switchdev_send_ring_test(br, mrp, 0, 0, 0, 0); /* Stop sending MRP_InTest frames if has an interconnect role */ cancel_delayed_work_sync(&mrp->in_test_work); br_mrp_switchdev_send_in_test(br, mrp, 0, 0, 0); /* Disable the roles */ br_mrp_switchdev_set_ring_role(br, mrp, BR_MRP_RING_ROLE_DISABLED); p = rtnl_dereference(mrp->i_port); if (p) br_mrp_switchdev_set_in_role(br, mrp, mrp->in_id, mrp->ring_id, BR_MRP_IN_ROLE_DISABLED); br_mrp_switchdev_del(br, mrp); /* Reset the ports */ p = rtnl_dereference(mrp->p_port); if (p) { spin_lock_bh(&br->lock); state = netif_running(br->dev) ? BR_STATE_FORWARDING : BR_STATE_DISABLED; p->state = state; p->flags &= ~BR_MRP_AWARE; spin_unlock_bh(&br->lock); br_mrp_port_switchdev_set_state(p, state); rcu_assign_pointer(mrp->p_port, NULL); } p = rtnl_dereference(mrp->s_port); if (p) { spin_lock_bh(&br->lock); state = netif_running(br->dev) ? BR_STATE_FORWARDING : BR_STATE_DISABLED; p->state = state; p->flags &= ~BR_MRP_AWARE; spin_unlock_bh(&br->lock); br_mrp_port_switchdev_set_state(p, state); rcu_assign_pointer(mrp->s_port, NULL); } p = rtnl_dereference(mrp->i_port); if (p) { spin_lock_bh(&br->lock); state = netif_running(br->dev) ? BR_STATE_FORWARDING : BR_STATE_DISABLED; p->state = state; p->flags &= ~BR_MRP_AWARE; spin_unlock_bh(&br->lock); br_mrp_port_switchdev_set_state(p, state); rcu_assign_pointer(mrp->i_port, NULL); } hlist_del_rcu(&mrp->list); kfree_rcu(mrp, rcu); if (hlist_empty(&br->mrp_list)) br_del_frame(br, &mrp_frame_type); } /* Adds a new MRP instance. * note: called under rtnl_lock */ int br_mrp_add(struct net_bridge *br, struct br_mrp_instance *instance) { struct net_bridge_port *p; struct br_mrp *mrp; int err; /* If the ring exists, it is not possible to create another one with the * same ring_id */ mrp = br_mrp_find_id(br, instance->ring_id); if (mrp) return -EINVAL; if (!br_mrp_get_port(br, instance->p_ifindex) || !br_mrp_get_port(br, instance->s_ifindex)) return -EINVAL; /* It is not possible to have the same port part of multiple rings */ if (!br_mrp_unique_ifindex(br, instance->p_ifindex) || !br_mrp_unique_ifindex(br, instance->s_ifindex)) return -EINVAL; mrp = kzalloc(sizeof(*mrp), GFP_KERNEL); if (!mrp) return -ENOMEM; mrp->ring_id = instance->ring_id; mrp->prio = instance->prio; p = br_mrp_get_port(br, instance->p_ifindex); spin_lock_bh(&br->lock); p->state = BR_STATE_FORWARDING; p->flags |= BR_MRP_AWARE; spin_unlock_bh(&br->lock); rcu_assign_pointer(mrp->p_port, p); p = br_mrp_get_port(br, instance->s_ifindex); spin_lock_bh(&br->lock); p->state = BR_STATE_FORWARDING; p->flags |= BR_MRP_AWARE; spin_unlock_bh(&br->lock); rcu_assign_pointer(mrp->s_port, p); if (hlist_empty(&br->mrp_list)) br_add_frame(br, &mrp_frame_type); INIT_DELAYED_WORK(&mrp->test_work, br_mrp_test_work_expired); INIT_DELAYED_WORK(&mrp->in_test_work, br_mrp_in_test_work_expired); hlist_add_tail_rcu(&mrp->list, &br->mrp_list); err = br_mrp_switchdev_add(br, mrp); if (err) goto delete_mrp; return 0; delete_mrp: br_mrp_del_impl(br, mrp); return err; } /* Deletes the MRP instance from which the port is part of * note: called under rtnl_lock */ void br_mrp_port_del(struct net_bridge *br, struct net_bridge_port *p) { struct br_mrp *mrp = br_mrp_find_port(br, p); /* If the port is not part of a MRP instance just bail out */ if (!mrp) return; br_mrp_del_impl(br, mrp); } /* Deletes existing MRP instance based on ring_id * note: called under rtnl_lock */ int br_mrp_del(struct net_bridge *br, struct br_mrp_instance *instance) { struct br_mrp *mrp = br_mrp_find_id(br, instance->ring_id); if (!mrp) return -EINVAL; br_mrp_del_impl(br, mrp); return 0; } /* Set port state, port state can be forwarding, blocked or disabled * note: already called with rtnl_lock */ int br_mrp_set_port_state(struct net_bridge_port *p, enum br_mrp_port_state_type state) { u32 port_state; if (!p || !(p->flags & BR_MRP_AWARE)) return -EINVAL; spin_lock_bh(&p->br->lock); if (state == BR_MRP_PORT_STATE_FORWARDING) port_state = BR_STATE_FORWARDING; else port_state = BR_STATE_BLOCKING; p->state = port_state; spin_unlock_bh(&p->br->lock); br_mrp_port_switchdev_set_state(p, port_state); return 0; } /* Set port role, port role can be primary or secondary * note: already called with rtnl_lock */ int br_mrp_set_port_role(struct net_bridge_port *p, enum br_mrp_port_role_type role) { struct br_mrp *mrp; if (!p || !(p->flags & BR_MRP_AWARE)) return -EINVAL; mrp = br_mrp_find_port(p->br, p); if (!mrp) return -EINVAL; switch (role) { case BR_MRP_PORT_ROLE_PRIMARY: rcu_assign_pointer(mrp->p_port, p); break; case BR_MRP_PORT_ROLE_SECONDARY: rcu_assign_pointer(mrp->s_port, p); break; default: return -EINVAL; } br_mrp_port_switchdev_set_role(p, role); return 0; } /* Set ring state, ring state can be only Open or Closed * note: already called with rtnl_lock */ int br_mrp_set_ring_state(struct net_bridge *br, struct br_mrp_ring_state *state) { struct br_mrp *mrp = br_mrp_find_id(br, state->ring_id); if (!mrp) return -EINVAL; if (mrp->ring_state != state->ring_state) mrp->ring_transitions++; mrp->ring_state = state->ring_state; br_mrp_switchdev_set_ring_state(br, mrp, state->ring_state); return 0; } /* Set ring role, ring role can be only MRM(Media Redundancy Manager) or * MRC(Media Redundancy Client). * note: already called with rtnl_lock */ int br_mrp_set_ring_role(struct net_bridge *br, struct br_mrp_ring_role *role) { struct br_mrp *mrp = br_mrp_find_id(br, role->ring_id); enum br_mrp_hw_support support; if (!mrp) return -EINVAL; mrp->ring_role = role->ring_role; /* If there is an error just bailed out */ support = br_mrp_switchdev_set_ring_role(br, mrp, role->ring_role); if (support == BR_MRP_NONE) return -EOPNOTSUPP; /* Now detect if the HW actually applied the role or not. If the HW * applied the role it means that the SW will not to do those operations * anymore. For example if the role ir MRM then the HW will notify the * SW when ring is open, but if the is not pushed to the HW the SW will * need to detect when the ring is open */ mrp->ring_role_offloaded = support == BR_MRP_SW ? 0 : 1; return 0; } /* Start to generate or monitor MRP test frames, the frames are generated by * HW and if it fails, they are generated by the SW. * note: already called with rtnl_lock */ int br_mrp_start_test(struct net_bridge *br, struct br_mrp_start_test *test) { struct br_mrp *mrp = br_mrp_find_id(br, test->ring_id); enum br_mrp_hw_support support; if (!mrp) return -EINVAL; /* Try to push it to the HW and if it fails then continue with SW * implementation and if that also fails then return error. */ support = br_mrp_switchdev_send_ring_test(br, mrp, test->interval, test->max_miss, test->period, test->monitor); if (support == BR_MRP_NONE) return -EOPNOTSUPP; if (support == BR_MRP_HW) return 0; mrp->test_interval = test->interval; mrp->test_end = jiffies + usecs_to_jiffies(test->period); mrp->test_max_miss = test->max_miss; mrp->test_monitor = test->monitor; mrp->test_count_miss = 0; queue_delayed_work(system_percpu_wq, &mrp->test_work, usecs_to_jiffies(test->interval)); return 0; } /* Set in state, int state can be only Open or Closed * note: already called with rtnl_lock */ int br_mrp_set_in_state(struct net_bridge *br, struct br_mrp_in_state *state) { struct br_mrp *mrp = br_mrp_find_in_id(br, state->in_id); if (!mrp) return -EINVAL; if (mrp->in_state != state->in_state) mrp->in_transitions++; mrp->in_state = state->in_state; br_mrp_switchdev_set_in_state(br, mrp, state->in_state); return 0; } /* Set in role, in role can be only MIM(Media Interconnection Manager) or * MIC(Media Interconnection Client). * note: already called with rtnl_lock */ int br_mrp_set_in_role(struct net_bridge *br, struct br_mrp_in_role *role) { struct br_mrp *mrp = br_mrp_find_id(br, role->ring_id); enum br_mrp_hw_support support; struct net_bridge_port *p; if (!mrp) return -EINVAL; if (!br_mrp_get_port(br, role->i_ifindex)) return -EINVAL; if (role->in_role == BR_MRP_IN_ROLE_DISABLED) { u8 state; /* It is not allowed to disable a port that doesn't exist */ p = rtnl_dereference(mrp->i_port); if (!p) return -EINVAL; /* Stop the generating MRP_InTest frames */ cancel_delayed_work_sync(&mrp->in_test_work); br_mrp_switchdev_send_in_test(br, mrp, 0, 0, 0); /* Remove the port */ spin_lock_bh(&br->lock); state = netif_running(br->dev) ? BR_STATE_FORWARDING : BR_STATE_DISABLED; p->state = state; p->flags &= ~BR_MRP_AWARE; spin_unlock_bh(&br->lock); br_mrp_port_switchdev_set_state(p, state); rcu_assign_pointer(mrp->i_port, NULL); mrp->in_role = role->in_role; mrp->in_id = 0; return 0; } /* It is not possible to have the same port part of multiple rings */ if (!br_mrp_unique_ifindex(br, role->i_ifindex)) return -EINVAL; /* It is not allowed to set a different interconnect port if the mrp * instance has already one. First it needs to be disabled and after * that set the new port */ if (rcu_access_pointer(mrp->i_port)) return -EINVAL; p = br_mrp_get_port(br, role->i_ifindex); spin_lock_bh(&br->lock); p->state = BR_STATE_FORWARDING; p->flags |= BR_MRP_AWARE; spin_unlock_bh(&br->lock); rcu_assign_pointer(mrp->i_port, p); mrp->in_role = role->in_role; mrp->in_id = role->in_id; /* If there is an error just bailed out */ support = br_mrp_switchdev_set_in_role(br, mrp, role->in_id, role->ring_id, role->in_role); if (support == BR_MRP_NONE) return -EOPNOTSUPP; /* Now detect if the HW actually applied the role or not. If the HW * applied the role it means that the SW will not to do those operations * anymore. For example if the role is MIM then the HW will notify the * SW when interconnect ring is open, but if the is not pushed to the HW * the SW will need to detect when the interconnect ring is open. */ mrp->in_role_offloaded = support == BR_MRP_SW ? 0 : 1; return 0; } /* Start to generate MRP_InTest frames, the frames are generated by * HW and if it fails, they are generated by the SW. * note: already called with rtnl_lock */ int br_mrp_start_in_test(struct net_bridge *br, struct br_mrp_start_in_test *in_test) { struct br_mrp *mrp = br_mrp_find_in_id(br, in_test->in_id); enum br_mrp_hw_support support; if (!mrp) return -EINVAL; if (mrp->in_role != BR_MRP_IN_ROLE_MIM) return -EINVAL; /* Try to push it to the HW and if it fails then continue with SW * implementation and if that also fails then return error. */ support = br_mrp_switchdev_send_in_test(br, mrp, in_test->interval, in_test->max_miss, in_test->period); if (support == BR_MRP_NONE) return -EOPNOTSUPP; if (support == BR_MRP_HW) return 0; mrp->in_test_interval = in_test->interval; mrp->in_test_end = jiffies + usecs_to_jiffies(in_test->period); mrp->in_test_max_miss = in_test->max_miss; mrp->in_test_count_miss = 0; queue_delayed_work(system_percpu_wq, &mrp->in_test_work, usecs_to_jiffies(in_test->interval)); return 0; } /* Determine if the frame type is a ring frame */ static bool br_mrp_ring_frame(struct sk_buff *skb) { const struct br_mrp_tlv_hdr *hdr; struct br_mrp_tlv_hdr _hdr; hdr = skb_header_pointer(skb, sizeof(uint16_t), sizeof(_hdr), &_hdr); if (!hdr) return false; if (hdr->type == BR_MRP_TLV_HEADER_RING_TEST || hdr->type == BR_MRP_TLV_HEADER_RING_TOPO || hdr->type == BR_MRP_TLV_HEADER_RING_LINK_DOWN || hdr->type == BR_MRP_TLV_HEADER_RING_LINK_UP || hdr->type == BR_MRP_TLV_HEADER_OPTION) return true; return false; } /* Determine if the frame type is an interconnect frame */ static bool br_mrp_in_frame(struct sk_buff *skb) { const struct br_mrp_tlv_hdr *hdr; struct br_mrp_tlv_hdr _hdr; hdr = skb_header_pointer(skb, sizeof(uint16_t), sizeof(_hdr), &_hdr); if (!hdr) return false; if (hdr->type == BR_MRP_TLV_HEADER_IN_TEST || hdr->type == BR_MRP_TLV_HEADER_IN_TOPO || hdr->type == BR_MRP_TLV_HEADER_IN_LINK_DOWN || hdr->type == BR_MRP_TLV_HEADER_IN_LINK_UP || hdr->type == BR_MRP_TLV_HEADER_IN_LINK_STATUS) return true; return false; } /* Process only MRP Test frame. All the other MRP frames are processed by * userspace application * note: already called with rcu_read_lock */ static void br_mrp_mrm_process(struct br_mrp *mrp, struct net_bridge_port *port, struct sk_buff *skb) { const struct br_mrp_tlv_hdr *hdr; struct br_mrp_tlv_hdr _hdr; /* Each MRP header starts with a version field which is 16 bits. * Therefore skip the version and get directly the TLV header. */ hdr = skb_header_pointer(skb, sizeof(uint16_t), sizeof(_hdr), &_hdr); if (!hdr) return; if (hdr->type != BR_MRP_TLV_HEADER_RING_TEST) return; mrp->test_count_miss = 0; /* Notify the userspace that the ring is closed only when the ring is * not closed */ if (mrp->ring_state != BR_MRP_RING_STATE_CLOSED) br_mrp_ring_port_open(port->dev, false); } /* Determine if the test hdr has a better priority than the node */ static bool br_mrp_test_better_than_own(struct br_mrp *mrp, struct net_bridge *br, const struct br_mrp_ring_test_hdr *hdr) { u16 prio = be16_to_cpu(hdr->prio); if (prio < mrp->prio || (prio == mrp->prio && ether_addr_to_u64(hdr->sa) < ether_addr_to_u64(br->dev->dev_addr))) return true; return false; } /* Process only MRP Test frame. All the other MRP frames are processed by * userspace application * note: already called with rcu_read_lock */ static void br_mrp_mra_process(struct br_mrp *mrp, struct net_bridge *br, struct net_bridge_port *port, struct sk_buff *skb) { const struct br_mrp_ring_test_hdr *test_hdr; struct br_mrp_ring_test_hdr _test_hdr; const struct br_mrp_tlv_hdr *hdr; struct br_mrp_tlv_hdr _hdr; /* Each MRP header starts with a version field which is 16 bits. * Therefore skip the version and get directly the TLV header. */ hdr = skb_header_pointer(skb, sizeof(uint16_t), sizeof(_hdr), &_hdr); if (!hdr) return; if (hdr->type != BR_MRP_TLV_HEADER_RING_TEST) return; test_hdr = skb_header_pointer(skb, sizeof(uint16_t) + sizeof(_hdr), sizeof(_test_hdr), &_test_hdr); if (!test_hdr) return; /* Only frames that have a better priority than the node will * clear the miss counter because otherwise the node will need to behave * as MRM. */ if (br_mrp_test_better_than_own(mrp, br, test_hdr)) mrp->test_count_miss = 0; } /* Process only MRP InTest frame. All the other MRP frames are processed by * userspace application * note: already called with rcu_read_lock */ static bool br_mrp_mim_process(struct br_mrp *mrp, struct net_bridge_port *port, struct sk_buff *skb) { const struct br_mrp_in_test_hdr *in_hdr; struct br_mrp_in_test_hdr _in_hdr; const struct br_mrp_tlv_hdr *hdr; struct br_mrp_tlv_hdr _hdr; /* Each MRP header starts with a version field which is 16 bits. * Therefore skip the version and get directly the TLV header. */ hdr = skb_header_pointer(skb, sizeof(uint16_t), sizeof(_hdr), &_hdr); if (!hdr) return false; /* The check for InTest frame type was already done */ in_hdr = skb_header_pointer(skb, sizeof(uint16_t) + sizeof(_hdr), sizeof(_in_hdr), &_in_hdr); if (!in_hdr) return false; /* It needs to process only it's own InTest frames. */ if (mrp->in_id != ntohs(in_hdr->id)) return false; mrp->in_test_count_miss = 0; /* Notify the userspace that the ring is closed only when the ring is * not closed */ if (mrp->in_state != BR_MRP_IN_STATE_CLOSED) br_mrp_in_port_open(port->dev, false); return true; } /* Get the MRP frame type * note: already called with rcu_read_lock */ static u8 br_mrp_get_frame_type(struct sk_buff *skb) { const struct br_mrp_tlv_hdr *hdr; struct br_mrp_tlv_hdr _hdr; /* Each MRP header starts with a version field which is 16 bits. * Therefore skip the version and get directly the TLV header. */ hdr = skb_header_pointer(skb, sizeof(uint16_t), sizeof(_hdr), &_hdr); if (!hdr) return 0xff; return hdr->type; } static bool br_mrp_mrm_behaviour(struct br_mrp *mrp) { if (mrp->ring_role == BR_MRP_RING_ROLE_MRM || (mrp->ring_role == BR_MRP_RING_ROLE_MRA && !mrp->test_monitor)) return true; return false; } static bool br_mrp_mrc_behaviour(struct br_mrp *mrp) { if (mrp->ring_role == BR_MRP_RING_ROLE_MRC || (mrp->ring_role == BR_MRP_RING_ROLE_MRA && mrp->test_monitor)) return true; return false; } /* This will just forward the frame to the other mrp ring ports, depending on * the frame type, ring role and interconnect role * note: already called with rcu_read_lock */ static int br_mrp_rcv(struct net_bridge_port *p, struct sk_buff *skb, struct net_device *dev) { struct net_bridge_port *p_port, *s_port, *i_port = NULL; struct net_bridge_port *p_dst, *s_dst, *i_dst = NULL; struct net_bridge *br; struct br_mrp *mrp; /* If port is disabled don't accept any frames */ if (p->state == BR_STATE_DISABLED) return 0; br = p->br; mrp = br_mrp_find_port(br, p); if (unlikely(!mrp)) return 0; p_port = rcu_dereference(mrp->p_port); if (!p_port) return 0; p_dst = p_port; s_port = rcu_dereference(mrp->s_port); if (!s_port) return 0; s_dst = s_port; /* If the frame is a ring frame then it is not required to check the * interconnect role and ports to process or forward the frame */ if (br_mrp_ring_frame(skb)) { /* If the role is MRM then don't forward the frames */ if (mrp->ring_role == BR_MRP_RING_ROLE_MRM) { br_mrp_mrm_process(mrp, p, skb); goto no_forward; } /* If the role is MRA then don't forward the frames if it * behaves as MRM node */ if (mrp->ring_role == BR_MRP_RING_ROLE_MRA) { if (!mrp->test_monitor) { br_mrp_mrm_process(mrp, p, skb); goto no_forward; } br_mrp_mra_process(mrp, br, p, skb); } goto forward; } if (br_mrp_in_frame(skb)) { u8 in_type = br_mrp_get_frame_type(skb); i_port = rcu_dereference(mrp->i_port); i_dst = i_port; /* If the ring port is in block state it should not forward * In_Test frames */ if (br_mrp_is_ring_port(p_port, s_port, p) && p->state == BR_STATE_BLOCKING && in_type == BR_MRP_TLV_HEADER_IN_TEST) goto no_forward; /* Nodes that behaves as MRM needs to stop forwarding the * frames in case the ring is closed, otherwise will be a loop. * In this case the frame is no forward between the ring ports. */ if (br_mrp_mrm_behaviour(mrp) && br_mrp_is_ring_port(p_port, s_port, p) && (s_port->state != BR_STATE_FORWARDING || p_port->state != BR_STATE_FORWARDING)) { p_dst = NULL; s_dst = NULL; } /* A node that behaves as MRC and doesn't have a interconnect * role then it should forward all frames between the ring ports * because it doesn't have an interconnect port */ if (br_mrp_mrc_behaviour(mrp) && mrp->in_role == BR_MRP_IN_ROLE_DISABLED) goto forward; if (mrp->in_role == BR_MRP_IN_ROLE_MIM) { if (in_type == BR_MRP_TLV_HEADER_IN_TEST) { /* MIM should not forward it's own InTest * frames */ if (br_mrp_mim_process(mrp, p, skb)) { goto no_forward; } else { if (br_mrp_is_ring_port(p_port, s_port, p)) i_dst = NULL; if (br_mrp_is_in_port(i_port, p)) goto no_forward; } } else { /* MIM should forward IntLinkChange/Status and * IntTopoChange between ring ports but MIM * should not forward IntLinkChange/Status and * IntTopoChange if the frame was received at * the interconnect port */ if (br_mrp_is_ring_port(p_port, s_port, p)) i_dst = NULL; if (br_mrp_is_in_port(i_port, p)) goto no_forward; } } if (mrp->in_role == BR_MRP_IN_ROLE_MIC) { /* MIC should forward InTest frames on all ports * regardless of the received port */ if (in_type == BR_MRP_TLV_HEADER_IN_TEST) goto forward; /* MIC should forward IntLinkChange frames only if they * are received on ring ports to all the ports */ if (br_mrp_is_ring_port(p_port, s_port, p) && (in_type == BR_MRP_TLV_HEADER_IN_LINK_UP || in_type == BR_MRP_TLV_HEADER_IN_LINK_DOWN)) goto forward; /* MIC should forward IntLinkStatus frames only to * interconnect port if it was received on a ring port. * If it is received on interconnect port then, it * should be forward on both ring ports */ if (br_mrp_is_ring_port(p_port, s_port, p) && in_type == BR_MRP_TLV_HEADER_IN_LINK_STATUS) { p_dst = NULL; s_dst = NULL; } /* Should forward the InTopo frames only between the * ring ports */ if (in_type == BR_MRP_TLV_HEADER_IN_TOPO) { i_dst = NULL; goto forward; } /* In all the other cases don't forward the frames */ goto no_forward; } } forward: if (p_dst) br_forward(p_dst, skb, true, false); if (s_dst) br_forward(s_dst, skb, true, false); if (i_dst) br_forward(i_dst, skb, true, false); no_forward: return 1; } /* Check if the frame was received on a port that is part of MRP ring * and if the frame has MRP eth. In that case process the frame otherwise do * normal forwarding. * note: already called with rcu_read_lock */ static int br_mrp_process(struct net_bridge_port *p, struct sk_buff *skb) { /* If there is no MRP instance do normal forwarding */ if (likely(!(p->flags & BR_MRP_AWARE))) goto out; return br_mrp_rcv(p, skb, p->dev); out: return 0; } bool br_mrp_enabled(struct net_bridge *br) { return !hlist_empty(&br->mrp_list); }
3 2 1 3 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 // SPDX-License-Identifier: GPL-2.0-or-later /* * HID driver for Holtek keyboard * Copyright (c) 2012 Tom Harwood */ /* */ #include <linux/device.h> #include <linux/hid.h> #include <linux/module.h> #include <linux/usb.h> #include "hid-ids.h" #include "usbhid/usbhid.h" /* Holtek based keyboards (USB ID 04d9:a055) have the following issues: * - The report descriptor specifies an excessively large number of consumer * usages (2^15), which is more than HID_MAX_USAGES. This prevents proper * parsing of the report descriptor. * - The report descriptor reports on caps/scroll/num lock key presses, but * doesn't have an LED output usage block. * * The replacement descriptor below fixes the number of consumer usages, * and provides an LED output usage block. LED output events are redirected * to the boot interface. */ static const __u8 holtek_kbd_rdesc_fixed[] = { /* Original report descriptor, with reduced number of consumer usages */ 0x05, 0x01, /* Usage Page (Desktop), */ 0x09, 0x80, /* Usage (Sys Control), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x01, /* Report ID (1), */ 0x19, 0x81, /* Usage Minimum (Sys Power Down), */ 0x29, 0x83, /* Usage Maximum (Sys Wake Up), */ 0x15, 0x00, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x95, 0x03, /* Report Count (3), */ 0x75, 0x01, /* Report Size (1), */ 0x81, 0x02, /* Input (Variable), */ 0x95, 0x01, /* Report Count (1), */ 0x75, 0x05, /* Report Size (5), */ 0x81, 0x01, /* Input (Constant), */ 0xC0, /* End Collection, */ 0x05, 0x0C, /* Usage Page (Consumer), */ 0x09, 0x01, /* Usage (Consumer Control), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x02, /* Report ID (2), */ 0x19, 0x00, /* Usage Minimum (00h), */ 0x2A, 0xFF, 0x2F, /* Usage Maximum (0x2FFF), previously 0x7FFF */ 0x15, 0x00, /* Logical Minimum (0), */ 0x26, 0xFF, 0x2F, /* Logical Maximum (0x2FFF),previously 0x7FFF*/ 0x95, 0x01, /* Report Count (1), */ 0x75, 0x10, /* Report Size (16), */ 0x81, 0x00, /* Input, */ 0xC0, /* End Collection, */ 0x05, 0x01, /* Usage Page (Desktop), */ 0x09, 0x06, /* Usage (Keyboard), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x03, /* Report ID (3), */ 0x95, 0x38, /* Report Count (56), */ 0x75, 0x01, /* Report Size (1), */ 0x15, 0x00, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x05, 0x07, /* Usage Page (Keyboard), */ 0x19, 0xE0, /* Usage Minimum (KB Leftcontrol), */ 0x29, 0xE7, /* Usage Maximum (KB Right GUI), */ 0x19, 0x00, /* Usage Minimum (None), */ 0x29, 0x2F, /* Usage Maximum (KB Lboxbracket And Lbrace),*/ 0x81, 0x02, /* Input (Variable), */ 0xC0, /* End Collection, */ 0x05, 0x01, /* Usage Page (Desktop), */ 0x09, 0x06, /* Usage (Keyboard), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x04, /* Report ID (4), */ 0x95, 0x38, /* Report Count (56), */ 0x75, 0x01, /* Report Size (1), */ 0x15, 0x00, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x05, 0x07, /* Usage Page (Keyboard), */ 0x19, 0x30, /* Usage Minimum (KB Rboxbracket And Rbrace),*/ 0x29, 0x67, /* Usage Maximum (KP Equals), */ 0x81, 0x02, /* Input (Variable), */ 0xC0, /* End Collection */ /* LED usage for the boot protocol interface */ 0x05, 0x01, /* Usage Page (Desktop), */ 0x09, 0x06, /* Usage (Keyboard), */ 0xA1, 0x01, /* Collection (Application), */ 0x05, 0x08, /* Usage Page (LED), */ 0x19, 0x01, /* Usage Minimum (01h), */ 0x29, 0x03, /* Usage Maximum (03h), */ 0x15, 0x00, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x75, 0x01, /* Report Size (1), */ 0x95, 0x03, /* Report Count (3), */ 0x91, 0x02, /* Output (Variable), */ 0x95, 0x05, /* Report Count (5), */ 0x91, 0x01, /* Output (Constant), */ 0xC0, /* End Collection */ }; static const __u8 *holtek_kbd_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { struct usb_interface *intf = to_usb_interface(hdev->dev.parent); if (intf->cur_altsetting->desc.bInterfaceNumber == 1) { *rsize = sizeof(holtek_kbd_rdesc_fixed); return holtek_kbd_rdesc_fixed; } return rdesc; } static int holtek_kbd_input_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) { struct hid_device *hid = input_get_drvdata(dev); struct usb_device *usb_dev = hid_to_usb_dev(hid); /* Locate the boot interface, to receive the LED change events */ struct usb_interface *boot_interface = usb_ifnum_to_if(usb_dev, 0); struct hid_device *boot_hid; struct hid_input *boot_hid_input; if (unlikely(boot_interface == NULL)) return -ENODEV; boot_hid = usb_get_intfdata(boot_interface); if (list_empty(&boot_hid->inputs)) { hid_err(hid, "no inputs found\n"); return -ENODEV; } boot_hid_input = list_first_entry(&boot_hid->inputs, struct hid_input, list); return boot_hid_input->input->event(boot_hid_input->input, type, code, value); } static int holtek_kbd_probe(struct hid_device *hdev, const struct hid_device_id *id) { struct usb_interface *intf; int ret; if (!hid_is_usb(hdev)) return -EINVAL; ret = hid_parse(hdev); if (!ret) ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); intf = to_usb_interface(hdev->dev.parent); if (!ret && intf->cur_altsetting->desc.bInterfaceNumber == 1) { struct hid_input *hidinput; list_for_each_entry(hidinput, &hdev->inputs, list) { hidinput->input->event = holtek_kbd_input_event; } } return ret; } static const struct hid_device_id holtek_kbd_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_KEYBOARD) }, { } }; MODULE_DEVICE_TABLE(hid, holtek_kbd_devices); static struct hid_driver holtek_kbd_driver = { .name = "holtek_kbd", .id_table = holtek_kbd_devices, .report_fixup = holtek_kbd_report_fixup, .probe = holtek_kbd_probe }; module_hid_driver(holtek_kbd_driver); MODULE_DESCRIPTION("HID driver for Holtek keyboard"); MODULE_LICENSE("GPL");
12 9 9 13 12 12 4 4 6 2 5 4 2 3 4 2 3 2 2 8 1 7 7 4 4 7 8 4 4 4 1 4 2 2 4 1 1 2 3 7 3 4 3 4 3 2 4 4 4 3 1 2 2 8 6 2 5 1 1 3 6 1 3 2 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 // SPDX-License-Identifier: GPL-2.0 /* * RTC subsystem, interface functions * * Copyright (C) 2005 Tower Technologies * Author: Alessandro Zummo <a.zummo@towertech.it> * * based on arch/arm/common/rtctime.c */ #include <linux/rtc.h> #include <linux/sched.h> #include <linux/module.h> #include <linux/log2.h> #include <linux/workqueue.h> #define CREATE_TRACE_POINTS #include <trace/events/rtc.h> static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer); static void rtc_timer_remove(struct rtc_device *rtc, struct rtc_timer *timer); static void rtc_add_offset(struct rtc_device *rtc, struct rtc_time *tm) { time64_t secs; if (!rtc->offset_secs) return; secs = rtc_tm_to_time64(tm); /* * Since the reading time values from RTC device are always in the RTC * original valid range, but we need to skip the overlapped region * between expanded range and original range, which is no need to add * the offset. */ if ((rtc->start_secs > rtc->range_min && secs >= rtc->start_secs) || (rtc->start_secs < rtc->range_min && secs <= (rtc->start_secs + rtc->range_max - rtc->range_min))) return; rtc_time64_to_tm(secs + rtc->offset_secs, tm); } static void rtc_subtract_offset(struct rtc_device *rtc, struct rtc_time *tm) { time64_t secs; if (!rtc->offset_secs) return; secs = rtc_tm_to_time64(tm); /* * If the setting time values are in the valid range of RTC hardware * device, then no need to subtract the offset when setting time to RTC * device. Otherwise we need to subtract the offset to make the time * values are valid for RTC hardware device. */ if (secs >= rtc->range_min && secs <= rtc->range_max) return; rtc_time64_to_tm(secs - rtc->offset_secs, tm); } static int rtc_valid_range(struct rtc_device *rtc, struct rtc_time *tm) { if (rtc->range_min != rtc->range_max) { time64_t time = rtc_tm_to_time64(tm); time64_t range_min = rtc->set_start_time ? rtc->start_secs : rtc->range_min; timeu64_t range_max = rtc->set_start_time ? (rtc->start_secs + rtc->range_max - rtc->range_min) : rtc->range_max; if (time < range_min || time > range_max) return -ERANGE; } return 0; } static int __rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm) { int err; if (!rtc->ops) { err = -ENODEV; } else if (!rtc->ops->read_time) { err = -EINVAL; } else { memset(tm, 0, sizeof(struct rtc_time)); err = rtc->ops->read_time(rtc->dev.parent, tm); if (err < 0) { dev_dbg(&rtc->dev, "read_time: fail to read: %d\n", err); return err; } rtc_add_offset(rtc, tm); err = rtc_valid_tm(tm); if (err < 0) dev_dbg(&rtc->dev, "read_time: rtc_time isn't valid\n"); } return err; } int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm) { int err; err = mutex_lock_interruptible(&rtc->ops_lock); if (err) return err; err = __rtc_read_time(rtc, tm); mutex_unlock(&rtc->ops_lock); trace_rtc_read_time(rtc_tm_to_time64(tm), err); return err; } EXPORT_SYMBOL_GPL(rtc_read_time); int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm) { int err, uie; err = rtc_valid_tm(tm); if (err != 0) return err; err = rtc_valid_range(rtc, tm); if (err) return err; rtc_subtract_offset(rtc, tm); #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL uie = rtc->uie_rtctimer.enabled || rtc->uie_irq_active; #else uie = rtc->uie_rtctimer.enabled; #endif if (uie) { err = rtc_update_irq_enable(rtc, 0); if (err) return err; } err = mutex_lock_interruptible(&rtc->ops_lock); if (err) return err; if (!rtc->ops) err = -ENODEV; else if (rtc->ops->set_time) err = rtc->ops->set_time(rtc->dev.parent, tm); else err = -EINVAL; pm_stay_awake(rtc->dev.parent); mutex_unlock(&rtc->ops_lock); /* A timer might have just expired */ schedule_work(&rtc->irqwork); if (uie) { err = rtc_update_irq_enable(rtc, 1); if (err) return err; } trace_rtc_set_time(rtc_tm_to_time64(tm), err); return err; } EXPORT_SYMBOL_GPL(rtc_set_time); static int rtc_read_alarm_internal(struct rtc_device *rtc, struct rtc_wkalrm *alarm) { int err; err = mutex_lock_interruptible(&rtc->ops_lock); if (err) return err; if (!rtc->ops) { err = -ENODEV; } else if (!test_bit(RTC_FEATURE_ALARM, rtc->features) || !rtc->ops->read_alarm) { err = -EINVAL; } else { alarm->enabled = 0; alarm->pending = 0; alarm->time.tm_sec = -1; alarm->time.tm_min = -1; alarm->time.tm_hour = -1; alarm->time.tm_mday = -1; alarm->time.tm_mon = -1; alarm->time.tm_year = -1; alarm->time.tm_wday = -1; alarm->time.tm_yday = -1; alarm->time.tm_isdst = -1; err = rtc->ops->read_alarm(rtc->dev.parent, alarm); } mutex_unlock(&rtc->ops_lock); trace_rtc_read_alarm(err?0:rtc_tm_to_time64(&alarm->time), err); return err; } int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) { int err; struct rtc_time before, now; int first_time = 1; time64_t t_now, t_alm; enum { none, day, month, year } missing = none; unsigned int days; /* The lower level RTC driver may return -1 in some fields, * creating invalid alarm->time values, for reasons like: * * - The hardware may not be capable of filling them in; * many alarms match only on time-of-day fields, not * day/month/year calendar data. * * - Some hardware uses illegal values as "wildcard" match * values, which non-Linux firmware (like a BIOS) may try * to set up as e.g. "alarm 15 minutes after each hour". * Linux uses only oneshot alarms. * * When we see that here, we deal with it by using values from * a current RTC timestamp for any missing (-1) values. The * RTC driver prevents "periodic alarm" modes. * * But this can be racey, because some fields of the RTC timestamp * may have wrapped in the interval since we read the RTC alarm, * which would lead to us inserting inconsistent values in place * of the -1 fields. * * Reading the alarm and timestamp in the reverse sequence * would have the same race condition, and not solve the issue. * * So, we must first read the RTC timestamp, * then read the RTC alarm value, * and then read a second RTC timestamp. * * If any fields of the second timestamp have changed * when compared with the first timestamp, then we know * our timestamp may be inconsistent with that used by * the low-level rtc_read_alarm_internal() function. * * So, when the two timestamps disagree, we just loop and do * the process again to get a fully consistent set of values. * * This could all instead be done in the lower level driver, * but since more than one lower level RTC implementation needs it, * then it's probably best to do it here instead of there.. */ /* Get the "before" timestamp */ err = rtc_read_time(rtc, &before); if (err < 0) return err; do { if (!first_time) memcpy(&before, &now, sizeof(struct rtc_time)); first_time = 0; /* get the RTC alarm values, which may be incomplete */ err = rtc_read_alarm_internal(rtc, alarm); if (err) return err; /* full-function RTCs won't have such missing fields */ err = rtc_valid_tm(&alarm->time); if (!err) goto done; /* get the "after" timestamp, to detect wrapped fields */ err = rtc_read_time(rtc, &now); if (err < 0) return err; /* note that tm_sec is a "don't care" value here: */ } while (before.tm_min != now.tm_min || before.tm_hour != now.tm_hour || before.tm_mon != now.tm_mon || before.tm_year != now.tm_year); /* Fill in the missing alarm fields using the timestamp; we * know there's at least one since alarm->time is invalid. */ if (alarm->time.tm_sec == -1) alarm->time.tm_sec = now.tm_sec; if (alarm->time.tm_min == -1) alarm->time.tm_min = now.tm_min; if (alarm->time.tm_hour == -1) alarm->time.tm_hour = now.tm_hour; /* For simplicity, only support date rollover for now */ if (alarm->time.tm_mday < 1 || alarm->time.tm_mday > 31) { alarm->time.tm_mday = now.tm_mday; missing = day; } if ((unsigned int)alarm->time.tm_mon >= 12) { alarm->time.tm_mon = now.tm_mon; if (missing == none) missing = month; } if (alarm->time.tm_year == -1) { alarm->time.tm_year = now.tm_year; if (missing == none) missing = year; } /* Can't proceed if alarm is still invalid after replacing * missing fields. */ err = rtc_valid_tm(&alarm->time); if (err) goto done; /* with luck, no rollover is needed */ t_now = rtc_tm_to_time64(&now); t_alm = rtc_tm_to_time64(&alarm->time); if (t_now < t_alm) goto done; switch (missing) { /* 24 hour rollover ... if it's now 10am Monday, an alarm that * that will trigger at 5am will do so at 5am Tuesday, which * could also be in the next month or year. This is a common * case, especially for PCs. */ case day: dev_dbg(&rtc->dev, "alarm rollover: %s\n", "day"); t_alm += 24 * 60 * 60; rtc_time64_to_tm(t_alm, &alarm->time); break; /* Month rollover ... if it's the 31th, an alarm on the 3rd will * be next month. An alarm matching on the 30th, 29th, or 28th * may end up in the month after that! Many newer PCs support * this type of alarm. */ case month: dev_dbg(&rtc->dev, "alarm rollover: %s\n", "month"); do { if (alarm->time.tm_mon < 11) { alarm->time.tm_mon++; } else { alarm->time.tm_mon = 0; alarm->time.tm_year++; } days = rtc_month_days(alarm->time.tm_mon, alarm->time.tm_year); } while (days < alarm->time.tm_mday); break; /* Year rollover ... easy except for leap years! */ case year: dev_dbg(&rtc->dev, "alarm rollover: %s\n", "year"); do { alarm->time.tm_year++; } while (!is_leap_year(alarm->time.tm_year + 1900) && rtc_valid_tm(&alarm->time) != 0); break; default: dev_warn(&rtc->dev, "alarm rollover not handled\n"); } err = rtc_valid_tm(&alarm->time); done: if (err && alarm->enabled) dev_warn(&rtc->dev, "invalid alarm value: %ptR\n", &alarm->time); else rtc_add_offset(rtc, &alarm->time); return err; } int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) { int err; err = mutex_lock_interruptible(&rtc->ops_lock); if (err) return err; if (!rtc->ops) { err = -ENODEV; } else if (!test_bit(RTC_FEATURE_ALARM, rtc->features)) { err = -EINVAL; } else { memset(alarm, 0, sizeof(struct rtc_wkalrm)); alarm->enabled = rtc->aie_timer.enabled; alarm->time = rtc_ktime_to_tm(rtc->aie_timer.node.expires); } mutex_unlock(&rtc->ops_lock); trace_rtc_read_alarm(rtc_tm_to_time64(&alarm->time), err); return err; } EXPORT_SYMBOL_GPL(rtc_read_alarm); static int __rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) { struct rtc_time tm; time64_t now, scheduled; int err; err = rtc_valid_tm(&alarm->time); if (err) return err; scheduled = rtc_tm_to_time64(&alarm->time); /* Make sure we're not setting alarms in the past */ err = __rtc_read_time(rtc, &tm); if (err) return err; now = rtc_tm_to_time64(&tm); if (scheduled <= now) return -ETIME; /* * XXX - We just checked to make sure the alarm time is not * in the past, but there is still a race window where if * the is alarm set for the next second and the second ticks * over right here, before we set the alarm. */ rtc_subtract_offset(rtc, &alarm->time); if (!rtc->ops) err = -ENODEV; else if (!test_bit(RTC_FEATURE_ALARM, rtc->features)) err = -EINVAL; else err = rtc->ops->set_alarm(rtc->dev.parent, alarm); /* * Check for potential race described above. If the waiting for next * second, and the second just ticked since the check above, either * * 1) It ticked after the alarm was set, and an alarm irq should be * generated. * * 2) It ticked before the alarm was set, and alarm irq most likely will * not be generated. * * While we cannot easily check for which of these two scenarios we * are in, we can return -ETIME to signal that the timer has already * expired, which is true in both cases. */ if ((scheduled - now) <= 1) { err = __rtc_read_time(rtc, &tm); if (err) return err; now = rtc_tm_to_time64(&tm); if (scheduled <= now) return -ETIME; } trace_rtc_set_alarm(rtc_tm_to_time64(&alarm->time), err); return err; } int rtc_set_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) { ktime_t alarm_time; int err; if (!rtc->ops) return -ENODEV; else if (!test_bit(RTC_FEATURE_ALARM, rtc->features)) return -EINVAL; err = rtc_valid_tm(&alarm->time); if (err != 0) return err; err = rtc_valid_range(rtc, &alarm->time); if (err) return err; err = mutex_lock_interruptible(&rtc->ops_lock); if (err) return err; if (rtc->aie_timer.enabled) rtc_timer_remove(rtc, &rtc->aie_timer); alarm_time = rtc_tm_to_ktime(alarm->time); /* * Round down so we never miss a deadline, checking for past deadline is * done in __rtc_set_alarm */ if (test_bit(RTC_FEATURE_ALARM_RES_MINUTE, rtc->features)) alarm_time = ktime_sub_ns(alarm_time, (u64)alarm->time.tm_sec * NSEC_PER_SEC); rtc->aie_timer.node.expires = alarm_time; rtc->aie_timer.period = 0; if (alarm->enabled) err = rtc_timer_enqueue(rtc, &rtc->aie_timer); mutex_unlock(&rtc->ops_lock); return err; } EXPORT_SYMBOL_GPL(rtc_set_alarm); /* Called once per device from rtc_device_register */ int rtc_initialize_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm) { int err; struct rtc_time now; err = rtc_valid_tm(&alarm->time); if (err != 0) return err; err = rtc_read_time(rtc, &now); if (err) return err; err = mutex_lock_interruptible(&rtc->ops_lock); if (err) return err; rtc->aie_timer.node.expires = rtc_tm_to_ktime(alarm->time); rtc->aie_timer.period = 0; /* Alarm has to be enabled & in the future for us to enqueue it */ if (alarm->enabled && (rtc_tm_to_ktime(now) < rtc->aie_timer.node.expires)) { rtc->aie_timer.enabled = 1; timerqueue_add(&rtc->timerqueue, &rtc->aie_timer.node); trace_rtc_timer_enqueue(&rtc->aie_timer); } mutex_unlock(&rtc->ops_lock); return err; } EXPORT_SYMBOL_GPL(rtc_initialize_alarm); int rtc_alarm_irq_enable(struct rtc_device *rtc, unsigned int enabled) { int err; err = mutex_lock_interruptible(&rtc->ops_lock); if (err) return err; if (rtc->aie_timer.enabled != enabled) { if (enabled) err = rtc_timer_enqueue(rtc, &rtc->aie_timer); else rtc_timer_remove(rtc, &rtc->aie_timer); } if (err) /* nothing */; else if (!rtc->ops) err = -ENODEV; else if (!test_bit(RTC_FEATURE_ALARM, rtc->features) || !rtc->ops->alarm_irq_enable) err = -EINVAL; else err = rtc->ops->alarm_irq_enable(rtc->dev.parent, enabled); mutex_unlock(&rtc->ops_lock); trace_rtc_alarm_irq_enable(enabled, err); return err; } EXPORT_SYMBOL_GPL(rtc_alarm_irq_enable); int rtc_update_irq_enable(struct rtc_device *rtc, unsigned int enabled) { int err; err = mutex_lock_interruptible(&rtc->ops_lock); if (err) return err; #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL if (enabled == 0 && rtc->uie_irq_active) { mutex_unlock(&rtc->ops_lock); return rtc_dev_update_irq_enable_emul(rtc, 0); } #endif /* make sure we're changing state */ if (rtc->uie_rtctimer.enabled == enabled) goto out; if (!test_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->features) || !test_bit(RTC_FEATURE_ALARM, rtc->features)) { mutex_unlock(&rtc->ops_lock); #ifdef CONFIG_RTC_INTF_DEV_UIE_EMUL return rtc_dev_update_irq_enable_emul(rtc, enabled); #else return -EINVAL; #endif } if (enabled) { struct rtc_time tm; ktime_t now, onesec; err = __rtc_read_time(rtc, &tm); if (err) goto out; onesec = ktime_set(1, 0); now = rtc_tm_to_ktime(tm); rtc->uie_rtctimer.node.expires = ktime_add(now, onesec); rtc->uie_rtctimer.period = ktime_set(1, 0); err = rtc_timer_enqueue(rtc, &rtc->uie_rtctimer); if (!err && rtc->ops && rtc->ops->alarm_irq_enable) err = rtc->ops->alarm_irq_enable(rtc->dev.parent, 1); if (err) goto out; } else { rtc_timer_remove(rtc, &rtc->uie_rtctimer); } out: mutex_unlock(&rtc->ops_lock); return err; } EXPORT_SYMBOL_GPL(rtc_update_irq_enable); /** * rtc_handle_legacy_irq - AIE, UIE and PIE event hook * @rtc: pointer to the rtc device * @num: number of occurence of the event * @mode: type of the event, RTC_AF, RTC_UF of RTC_PF * * This function is called when an AIE, UIE or PIE mode interrupt * has occurred (or been emulated). * */ void rtc_handle_legacy_irq(struct rtc_device *rtc, int num, int mode) { unsigned long flags; /* mark one irq of the appropriate mode */ spin_lock_irqsave(&rtc->irq_lock, flags); rtc->irq_data = (rtc->irq_data + (num << 8)) | (RTC_IRQF | mode); spin_unlock_irqrestore(&rtc->irq_lock, flags); wake_up_interruptible(&rtc->irq_queue); kill_fasync(&rtc->async_queue, SIGIO, POLL_IN); } /** * rtc_aie_update_irq - AIE mode rtctimer hook * @rtc: pointer to the rtc_device * * This functions is called when the aie_timer expires. */ void rtc_aie_update_irq(struct rtc_device *rtc) { rtc_handle_legacy_irq(rtc, 1, RTC_AF); } /** * rtc_uie_update_irq - UIE mode rtctimer hook * @rtc: pointer to the rtc_device * * This functions is called when the uie_timer expires. */ void rtc_uie_update_irq(struct rtc_device *rtc) { rtc_handle_legacy_irq(rtc, 1, RTC_UF); } /** * rtc_pie_update_irq - PIE mode hrtimer hook * @timer: pointer to the pie mode hrtimer * * This function is used to emulate PIE mode interrupts * using an hrtimer. This function is called when the periodic * hrtimer expires. */ enum hrtimer_restart rtc_pie_update_irq(struct hrtimer *timer) { struct rtc_device *rtc; ktime_t period; u64 count; rtc = container_of(timer, struct rtc_device, pie_timer); period = NSEC_PER_SEC / rtc->irq_freq; count = hrtimer_forward_now(timer, period); rtc_handle_legacy_irq(rtc, count, RTC_PF); return HRTIMER_RESTART; } /** * rtc_update_irq - Triggered when a RTC interrupt occurs. * @rtc: the rtc device * @num: how many irqs are being reported (usually one) * @events: mask of RTC_IRQF with one or more of RTC_PF, RTC_AF, RTC_UF * Context: any */ void rtc_update_irq(struct rtc_device *rtc, unsigned long num, unsigned long events) { if (IS_ERR_OR_NULL(rtc)) return; pm_stay_awake(rtc->dev.parent); schedule_work(&rtc->irqwork); } EXPORT_SYMBOL_GPL(rtc_update_irq); struct rtc_device *rtc_class_open(const char *name) { struct device *dev; struct rtc_device *rtc = NULL; dev = class_find_device_by_name(&rtc_class, name); if (dev) rtc = to_rtc_device(dev); if (rtc) { if (!try_module_get(rtc->owner)) { put_device(dev); rtc = NULL; } } return rtc; } EXPORT_SYMBOL_GPL(rtc_class_open); void rtc_class_close(struct rtc_device *rtc) { module_put(rtc->owner); put_device(&rtc->dev); } EXPORT_SYMBOL_GPL(rtc_class_close); static int rtc_update_hrtimer(struct rtc_device *rtc, int enabled) { /* * We always cancel the timer here first, because otherwise * we could run into BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); * when we manage to start the timer before the callback * returns HRTIMER_RESTART. * * We cannot use hrtimer_cancel() here as a running callback * could be blocked on rtc->irq_task_lock and hrtimer_cancel() * would spin forever. */ if (hrtimer_try_to_cancel(&rtc->pie_timer) < 0) return -1; if (enabled) { ktime_t period = NSEC_PER_SEC / rtc->irq_freq; hrtimer_start(&rtc->pie_timer, period, HRTIMER_MODE_REL); } return 0; } /** * rtc_irq_set_state - enable/disable 2^N Hz periodic IRQs * @rtc: the rtc device * @enabled: true to enable periodic IRQs * Context: any * * Note that rtc_irq_set_freq() should previously have been used to * specify the desired frequency of periodic IRQ. */ int rtc_irq_set_state(struct rtc_device *rtc, int enabled) { int err = 0; while (rtc_update_hrtimer(rtc, enabled) < 0) cpu_relax(); rtc->pie_enabled = enabled; trace_rtc_irq_set_state(enabled, err); return err; } /** * rtc_irq_set_freq - set 2^N Hz periodic IRQ frequency for IRQ * @rtc: the rtc device * @freq: positive frequency * Context: any * * Note that rtc_irq_set_state() is used to enable or disable the * periodic IRQs. */ int rtc_irq_set_freq(struct rtc_device *rtc, int freq) { int err = 0; if (freq <= 0 || freq > RTC_MAX_FREQ) return -EINVAL; rtc->irq_freq = freq; while (rtc->pie_enabled && rtc_update_hrtimer(rtc, 1) < 0) cpu_relax(); trace_rtc_irq_set_freq(freq, err); return err; } /** * rtc_timer_enqueue - Adds a rtc_timer to the rtc_device timerqueue * @rtc: rtc device * @timer: timer being added. * * Enqueues a timer onto the rtc devices timerqueue and sets * the next alarm event appropriately. * * Sets the enabled bit on the added timer. * * Must hold ops_lock for proper serialization of timerqueue */ static int rtc_timer_enqueue(struct rtc_device *rtc, struct rtc_timer *timer) { struct timerqueue_node *next = timerqueue_getnext(&rtc->timerqueue); struct rtc_time tm; ktime_t now; int err; err = __rtc_read_time(rtc, &tm); if (err) return err; timer->enabled = 1; now = rtc_tm_to_ktime(tm); /* Skip over expired timers */ while (next) { if (next->expires >= now) break; next = timerqueue_iterate_next(next); } timerqueue_add(&rtc->timerqueue, &timer->node); trace_rtc_timer_enqueue(timer); if (!next || ktime_before(timer->node.expires, next->expires)) { struct rtc_wkalrm alarm; alarm.time = rtc_ktime_to_tm(timer->node.expires); alarm.enabled = 1; err = __rtc_set_alarm(rtc, &alarm); if (err == -ETIME) { pm_stay_awake(rtc->dev.parent); schedule_work(&rtc->irqwork); } else if (err) { timerqueue_del(&rtc->timerqueue, &timer->node); trace_rtc_timer_dequeue(timer); timer->enabled = 0; return err; } } return 0; } static void rtc_alarm_disable(struct rtc_device *rtc) { if (!rtc->ops || !test_bit(RTC_FEATURE_ALARM, rtc->features) || !rtc->ops->alarm_irq_enable) return; rtc->ops->alarm_irq_enable(rtc->dev.parent, false); trace_rtc_alarm_irq_enable(0, 0); } /** * rtc_timer_remove - Removes a rtc_timer from the rtc_device timerqueue * @rtc: rtc device * @timer: timer being removed. * * Removes a timer onto the rtc devices timerqueue and sets * the next alarm event appropriately. * * Clears the enabled bit on the removed timer. * * Must hold ops_lock for proper serialization of timerqueue */ static void rtc_timer_remove(struct rtc_device *rtc, struct rtc_timer *timer) { struct timerqueue_node *next = timerqueue_getnext(&rtc->timerqueue); timerqueue_del(&rtc->timerqueue, &timer->node); trace_rtc_timer_dequeue(timer); timer->enabled = 0; if (next == &timer->node) { struct rtc_wkalrm alarm; int err; next = timerqueue_getnext(&rtc->timerqueue); if (!next) { rtc_alarm_disable(rtc); return; } alarm.time = rtc_ktime_to_tm(next->expires); alarm.enabled = 1; err = __rtc_set_alarm(rtc, &alarm); if (err == -ETIME) { pm_stay_awake(rtc->dev.parent); schedule_work(&rtc->irqwork); } } } /** * rtc_timer_do_work - Expires rtc timers * @work: work item * * Expires rtc timers. Reprograms next alarm event if needed. * Called via worktask. * * Serializes access to timerqueue via ops_lock mutex */ void rtc_timer_do_work(struct work_struct *work) { struct rtc_timer *timer; struct timerqueue_node *next; ktime_t now; struct rtc_time tm; int err; struct rtc_device *rtc = container_of(work, struct rtc_device, irqwork); mutex_lock(&rtc->ops_lock); again: err = __rtc_read_time(rtc, &tm); if (err) { mutex_unlock(&rtc->ops_lock); return; } now = rtc_tm_to_ktime(tm); while ((next = timerqueue_getnext(&rtc->timerqueue))) { if (next->expires > now) break; /* expire timer */ timer = container_of(next, struct rtc_timer, node); timerqueue_del(&rtc->timerqueue, &timer->node); trace_rtc_timer_dequeue(timer); timer->enabled = 0; if (timer->func) timer->func(timer->rtc); trace_rtc_timer_fired(timer); /* Re-add/fwd periodic timers */ if (ktime_to_ns(timer->period)) { timer->node.expires = ktime_add(timer->node.expires, timer->period); timer->enabled = 1; timerqueue_add(&rtc->timerqueue, &timer->node); trace_rtc_timer_enqueue(timer); } } /* Set next alarm */ if (next) { struct rtc_wkalrm alarm; int err; int retry = 3; alarm.time = rtc_ktime_to_tm(next->expires); alarm.enabled = 1; reprogram: err = __rtc_set_alarm(rtc, &alarm); if (err == -ETIME) { goto again; } else if (err) { if (retry-- > 0) goto reprogram; timer = container_of(next, struct rtc_timer, node); timerqueue_del(&rtc->timerqueue, &timer->node); trace_rtc_timer_dequeue(timer); timer->enabled = 0; dev_err(&rtc->dev, "__rtc_set_alarm: err=%d\n", err); goto again; } } else { rtc_alarm_disable(rtc); } pm_relax(rtc->dev.parent); mutex_unlock(&rtc->ops_lock); } /* rtc_timer_init - Initializes an rtc_timer * @timer: timer to be intiialized * @f: function pointer to be called when timer fires * @rtc: pointer to the rtc_device * * Kernel interface to initializing an rtc_timer. */ void rtc_timer_init(struct rtc_timer *timer, void (*f)(struct rtc_device *r), struct rtc_device *rtc) { timerqueue_init(&timer->node); timer->enabled = 0; timer->func = f; timer->rtc = rtc; } /* rtc_timer_start - Sets an rtc_timer to fire in the future * @ rtc: rtc device to be used * @ timer: timer being set * @ expires: time at which to expire the timer * @ period: period that the timer will recur * * Kernel interface to set an rtc_timer */ int rtc_timer_start(struct rtc_device *rtc, struct rtc_timer *timer, ktime_t expires, ktime_t period) { int ret = 0; mutex_lock(&rtc->ops_lock); if (timer->enabled) rtc_timer_remove(rtc, timer); timer->node.expires = expires; timer->period = period; ret = rtc_timer_enqueue(rtc, timer); mutex_unlock(&rtc->ops_lock); return ret; } /* rtc_timer_cancel - Stops an rtc_timer * @ rtc: rtc device to be used * @ timer: timer being set * * Kernel interface to cancel an rtc_timer */ void rtc_timer_cancel(struct rtc_device *rtc, struct rtc_timer *timer) { mutex_lock(&rtc->ops_lock); if (timer->enabled) rtc_timer_remove(rtc, timer); mutex_unlock(&rtc->ops_lock); } /** * rtc_read_offset - Read the amount of rtc offset in parts per billion * @rtc: rtc device to be used * @offset: the offset in parts per billion * * see below for details. * * Kernel interface to read rtc clock offset * Returns 0 on success, or a negative number on error. * If read_offset() is not implemented for the rtc, return -EINVAL */ int rtc_read_offset(struct rtc_device *rtc, long *offset) { int ret; if (!rtc->ops) return -ENODEV; if (!rtc->ops->read_offset) return -EINVAL; mutex_lock(&rtc->ops_lock); ret = rtc->ops->read_offset(rtc->dev.parent, offset); mutex_unlock(&rtc->ops_lock); trace_rtc_read_offset(*offset, ret); return ret; } /** * rtc_set_offset - Adjusts the duration of the average second * @rtc: rtc device to be used * @offset: the offset in parts per billion * * Some rtc's allow an adjustment to the average duration of a second * to compensate for differences in the actual clock rate due to temperature, * the crystal, capacitor, etc. * * The adjustment applied is as follows: * t = t0 * (1 + offset * 1e-9) * where t0 is the measured length of 1 RTC second with offset = 0 * * Kernel interface to adjust an rtc clock offset. * Return 0 on success, or a negative number on error. * If the rtc offset is not setable (or not implemented), return -EINVAL */ int rtc_set_offset(struct rtc_device *rtc, long offset) { int ret; if (!rtc->ops) return -ENODEV; if (!rtc->ops->set_offset) return -EINVAL; mutex_lock(&rtc->ops_lock); ret = rtc->ops->set_offset(rtc->dev.parent, offset); mutex_unlock(&rtc->ops_lock); trace_rtc_set_offset(offset, ret); return ret; }
3 1 2 2 4 1 1 2 3 1 2 1 2 29 7 19 17 2 1 1 1 2 3 15 16 4 32 70 69 5 5 5 1 5 11 11 10 1 10 3 6 1 29 31 9 2 4 3 1 1 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 // SPDX-License-Identifier: GPL-2.0-only /* * v4l2-event.c * * V4L2 events. * * Copyright (C) 2009--2010 Nokia Corporation. * * Contact: Sakari Ailus <sakari.ailus@iki.fi> */ #include <media/v4l2-dev.h> #include <media/v4l2-fh.h> #include <media/v4l2-event.h> #include <linux/mm.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/export.h> static unsigned int sev_pos(const struct v4l2_subscribed_event *sev, unsigned int idx) { idx += sev->first; return idx >= sev->elems ? idx - sev->elems : idx; } static int __v4l2_event_dequeue(struct v4l2_fh *fh, struct v4l2_event *event) { struct v4l2_kevent *kev; struct timespec64 ts; unsigned long flags; spin_lock_irqsave(&fh->vdev->fh_lock, flags); if (list_empty(&fh->available)) { spin_unlock_irqrestore(&fh->vdev->fh_lock, flags); return -ENOENT; } WARN_ON(fh->navailable == 0); kev = list_first_entry(&fh->available, struct v4l2_kevent, list); list_del(&kev->list); fh->navailable--; kev->event.pending = fh->navailable; *event = kev->event; ts = ns_to_timespec64(kev->ts); event->timestamp.tv_sec = ts.tv_sec; event->timestamp.tv_nsec = ts.tv_nsec; kev->sev->first = sev_pos(kev->sev, 1); kev->sev->in_use--; spin_unlock_irqrestore(&fh->vdev->fh_lock, flags); return 0; } int v4l2_event_dequeue(struct v4l2_fh *fh, struct v4l2_event *event, int nonblocking) { int ret; if (nonblocking) return __v4l2_event_dequeue(fh, event); /* Release the vdev lock while waiting */ if (fh->vdev->lock) mutex_unlock(fh->vdev->lock); do { ret = wait_event_interruptible(fh->wait, fh->navailable != 0); if (ret < 0) break; ret = __v4l2_event_dequeue(fh, event); } while (ret == -ENOENT); if (fh->vdev->lock) mutex_lock(fh->vdev->lock); return ret; } EXPORT_SYMBOL_GPL(v4l2_event_dequeue); /* Caller must hold fh->vdev->fh_lock! */ static struct v4l2_subscribed_event *v4l2_event_subscribed( struct v4l2_fh *fh, u32 type, u32 id) { struct v4l2_subscribed_event *sev; assert_spin_locked(&fh->vdev->fh_lock); list_for_each_entry(sev, &fh->subscribed, list) if (sev->type == type && sev->id == id) return sev; return NULL; } static void __v4l2_event_queue_fh(struct v4l2_fh *fh, const struct v4l2_event *ev, u64 ts) { struct v4l2_subscribed_event *sev; struct v4l2_kevent *kev; bool copy_payload = true; /* Are we subscribed? */ sev = v4l2_event_subscribed(fh, ev->type, ev->id); if (sev == NULL) return; /* Increase event sequence number on fh. */ fh->sequence++; /* Do we have any free events? */ if (sev->in_use == sev->elems) { /* no, remove the oldest one */ kev = sev->events + sev_pos(sev, 0); list_del(&kev->list); sev->in_use--; sev->first = sev_pos(sev, 1); fh->navailable--; if (sev->elems == 1) { if (sev->ops && sev->ops->replace) { sev->ops->replace(&kev->event, ev); copy_payload = false; } } else if (sev->ops && sev->ops->merge) { struct v4l2_kevent *second_oldest = sev->events + sev_pos(sev, 0); sev->ops->merge(&kev->event, &second_oldest->event); } } /* Take one and fill it. */ kev = sev->events + sev_pos(sev, sev->in_use); kev->event.type = ev->type; if (copy_payload) kev->event.u = ev->u; kev->event.id = ev->id; kev->ts = ts; kev->event.sequence = fh->sequence; sev->in_use++; list_add_tail(&kev->list, &fh->available); fh->navailable++; wake_up_all(&fh->wait); } void v4l2_event_queue(struct video_device *vdev, const struct v4l2_event *ev) { struct v4l2_fh *fh; unsigned long flags; u64 ts; if (vdev == NULL) return; ts = ktime_get_ns(); spin_lock_irqsave(&vdev->fh_lock, flags); list_for_each_entry(fh, &vdev->fh_list, list) __v4l2_event_queue_fh(fh, ev, ts); spin_unlock_irqrestore(&vdev->fh_lock, flags); } EXPORT_SYMBOL_GPL(v4l2_event_queue); void v4l2_event_queue_fh(struct v4l2_fh *fh, const struct v4l2_event *ev) { unsigned long flags; u64 ts = ktime_get_ns(); spin_lock_irqsave(&fh->vdev->fh_lock, flags); __v4l2_event_queue_fh(fh, ev, ts); spin_unlock_irqrestore(&fh->vdev->fh_lock, flags); } EXPORT_SYMBOL_GPL(v4l2_event_queue_fh); int v4l2_event_pending(struct v4l2_fh *fh) { return fh->navailable; } EXPORT_SYMBOL_GPL(v4l2_event_pending); void v4l2_event_wake_all(struct video_device *vdev) { struct v4l2_fh *fh; unsigned long flags; if (!vdev) return; spin_lock_irqsave(&vdev->fh_lock, flags); list_for_each_entry(fh, &vdev->fh_list, list) wake_up_all(&fh->wait); spin_unlock_irqrestore(&vdev->fh_lock, flags); } EXPORT_SYMBOL_GPL(v4l2_event_wake_all); static void __v4l2_event_unsubscribe(struct v4l2_subscribed_event *sev) { struct v4l2_fh *fh = sev->fh; unsigned int i; lockdep_assert_held(&fh->subscribe_lock); assert_spin_locked(&fh->vdev->fh_lock); /* Remove any pending events for this subscription */ for (i = 0; i < sev->in_use; i++) { list_del(&sev->events[sev_pos(sev, i)].list); fh->navailable--; } list_del(&sev->list); } int v4l2_event_subscribe(struct v4l2_fh *fh, const struct v4l2_event_subscription *sub, unsigned int elems, const struct v4l2_subscribed_event_ops *ops) { struct v4l2_subscribed_event *sev, *found_ev; unsigned long flags; unsigned int i; int ret = 0; if (sub->type == V4L2_EVENT_ALL) return -EINVAL; if (elems < 1) elems = 1; sev = kvzalloc(struct_size(sev, events, elems), GFP_KERNEL); if (!sev) return -ENOMEM; sev->elems = elems; for (i = 0; i < elems; i++) sev->events[i].sev = sev; sev->type = sub->type; sev->id = sub->id; sev->flags = sub->flags; sev->fh = fh; sev->ops = ops; mutex_lock(&fh->subscribe_lock); spin_lock_irqsave(&fh->vdev->fh_lock, flags); found_ev = v4l2_event_subscribed(fh, sub->type, sub->id); if (!found_ev) list_add(&sev->list, &fh->subscribed); spin_unlock_irqrestore(&fh->vdev->fh_lock, flags); if (found_ev) { /* Already listening */ kvfree(sev); } else if (sev->ops && sev->ops->add) { ret = sev->ops->add(sev, elems); if (ret) { spin_lock_irqsave(&fh->vdev->fh_lock, flags); __v4l2_event_unsubscribe(sev); spin_unlock_irqrestore(&fh->vdev->fh_lock, flags); kvfree(sev); } } mutex_unlock(&fh->subscribe_lock); return ret; } EXPORT_SYMBOL_GPL(v4l2_event_subscribe); void v4l2_event_unsubscribe_all(struct v4l2_fh *fh) { struct v4l2_event_subscription sub; struct v4l2_subscribed_event *sev; unsigned long flags; do { sev = NULL; spin_lock_irqsave(&fh->vdev->fh_lock, flags); if (!list_empty(&fh->subscribed)) { sev = list_first_entry(&fh->subscribed, struct v4l2_subscribed_event, list); sub.type = sev->type; sub.id = sev->id; } spin_unlock_irqrestore(&fh->vdev->fh_lock, flags); if (sev) v4l2_event_unsubscribe(fh, &sub); } while (sev); } EXPORT_SYMBOL_GPL(v4l2_event_unsubscribe_all); int v4l2_event_unsubscribe(struct v4l2_fh *fh, const struct v4l2_event_subscription *sub) { struct v4l2_subscribed_event *sev; unsigned long flags; if (sub->type == V4L2_EVENT_ALL) { v4l2_event_unsubscribe_all(fh); return 0; } mutex_lock(&fh->subscribe_lock); spin_lock_irqsave(&fh->vdev->fh_lock, flags); sev = v4l2_event_subscribed(fh, sub->type, sub->id); if (sev != NULL) __v4l2_event_unsubscribe(sev); spin_unlock_irqrestore(&fh->vdev->fh_lock, flags); if (sev && sev->ops && sev->ops->del) sev->ops->del(sev); mutex_unlock(&fh->subscribe_lock); kvfree(sev); return 0; } EXPORT_SYMBOL_GPL(v4l2_event_unsubscribe); int v4l2_event_subdev_unsubscribe(struct v4l2_subdev *sd, struct v4l2_fh *fh, struct v4l2_event_subscription *sub) { return v4l2_event_unsubscribe(fh, sub); } EXPORT_SYMBOL_GPL(v4l2_event_subdev_unsubscribe); static void v4l2_event_src_replace(struct v4l2_event *old, const struct v4l2_event *new) { u32 old_changes = old->u.src_change.changes; old->u.src_change = new->u.src_change; old->u.src_change.changes |= old_changes; } static void v4l2_event_src_merge(const struct v4l2_event *old, struct v4l2_event *new) { new->u.src_change.changes |= old->u.src_change.changes; } static const struct v4l2_subscribed_event_ops v4l2_event_src_ch_ops = { .replace = v4l2_event_src_replace, .merge = v4l2_event_src_merge, }; int v4l2_src_change_event_subscribe(struct v4l2_fh *fh, const struct v4l2_event_subscription *sub) { if (sub->type == V4L2_EVENT_SOURCE_CHANGE) return v4l2_event_subscribe(fh, sub, 0, &v4l2_event_src_ch_ops); return -EINVAL; } EXPORT_SYMBOL_GPL(v4l2_src_change_event_subscribe); int v4l2_src_change_event_subdev_subscribe(struct v4l2_subdev *sd, struct v4l2_fh *fh, struct v4l2_event_subscription *sub) { return v4l2_src_change_event_subscribe(fh, sub); } EXPORT_SYMBOL_GPL(v4l2_src_change_event_subdev_subscribe);
21 21 1 21 21 22 21 1 2 2 24 24 20 4 20 20 20 18 1 20 20 19 20 20 20 20 18 20 20 20 20 18 1 2 20 19 20 20 3 4 21 21 21 5 28 5 5 5 5 5 6 6 5 5 5 3 5 2 26 26 5 5 5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 // SPDX-License-Identifier: GPL-2.0-or-later /* RxRPC individual remote procedure call handling * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/slab.h> #include <linux/module.h> #include <linux/circ_buf.h> #include <linux/spinlock_types.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include "ar-internal.h" const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_UNINITIALISED] = "Uninit ", [RXRPC_CALL_CLIENT_AWAIT_CONN] = "ClWtConn", [RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq", [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl", [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl", [RXRPC_CALL_SERVER_PREALLOC] = "SvPrealc", [RXRPC_CALL_SERVER_RECV_REQUEST] = "SvRcvReq", [RXRPC_CALL_SERVER_ACK_REQUEST] = "SvAckReq", [RXRPC_CALL_SERVER_SEND_REPLY] = "SvSndRpl", [RXRPC_CALL_SERVER_AWAIT_ACK] = "SvAwtACK", [RXRPC_CALL_COMPLETE] = "Complete", }; const char *const rxrpc_call_completions[NR__RXRPC_CALL_COMPLETIONS] = { [RXRPC_CALL_SUCCEEDED] = "Complete", [RXRPC_CALL_REMOTELY_ABORTED] = "RmtAbort", [RXRPC_CALL_LOCALLY_ABORTED] = "LocAbort", [RXRPC_CALL_LOCAL_ERROR] = "LocError", [RXRPC_CALL_NETWORK_ERROR] = "NetError", }; struct kmem_cache *rxrpc_call_jar; static DEFINE_SEMAPHORE(rxrpc_call_limiter, 1000); static DEFINE_SEMAPHORE(rxrpc_kernel_call_limiter, 1000); void rxrpc_poke_call(struct rxrpc_call *call, enum rxrpc_call_poke_trace what) { struct rxrpc_local *local = call->local; bool busy; if (!test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) { spin_lock_irq(&local->lock); busy = !list_empty(&call->attend_link); trace_rxrpc_poke_call(call, busy, what); if (!busy && !rxrpc_try_get_call(call, rxrpc_call_get_poke)) busy = true; if (!busy) { list_add_tail(&call->attend_link, &local->call_attend_q); } spin_unlock_irq(&local->lock); if (!busy) rxrpc_wake_up_io_thread(local); } } static void rxrpc_call_timer_expired(struct timer_list *t) { struct rxrpc_call *call = timer_container_of(call, t, timer); _enter("%d", call->debug_id); if (!__rxrpc_call_is_complete(call)) { trace_rxrpc_timer_expired(call); rxrpc_poke_call(call, rxrpc_call_poke_timer); } } static struct lock_class_key rxrpc_call_user_mutex_lock_class_key; static void rxrpc_destroy_call(struct work_struct *); /* * find an extant server call * - called in process context with IRQs enabled */ struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *rx, unsigned long user_call_ID) { struct rxrpc_call *call; struct rb_node *p; _enter("%p,%lx", rx, user_call_ID); read_lock(&rx->call_lock); p = rx->calls.rb_node; while (p) { call = rb_entry(p, struct rxrpc_call, sock_node); if (user_call_ID < call->user_call_ID) p = p->rb_left; else if (user_call_ID > call->user_call_ID) p = p->rb_right; else goto found_extant_call; } read_unlock(&rx->call_lock); _leave(" = NULL"); return NULL; found_extant_call: rxrpc_get_call(call, rxrpc_call_get_sendmsg); read_unlock(&rx->call_lock); _leave(" = %p [%d]", call, refcount_read(&call->ref)); return call; } /* * allocate a new call */ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, unsigned int debug_id) { struct rxrpc_call *call; struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); call = kmem_cache_zalloc(rxrpc_call_jar, gfp); if (!call) return NULL; mutex_init(&call->user_mutex); /* Prevent lockdep reporting a deadlock false positive between the afs * filesystem and sys_sendmsg() via the mmap sem. */ if (rx->sk.sk_kern_sock) lockdep_set_class(&call->user_mutex, &rxrpc_call_user_mutex_lock_class_key); timer_setup(&call->timer, rxrpc_call_timer_expired, 0); INIT_WORK(&call->destroyer, rxrpc_destroy_call); INIT_LIST_HEAD(&call->link); INIT_LIST_HEAD(&call->wait_link); INIT_LIST_HEAD(&call->accept_link); INIT_LIST_HEAD(&call->recvmsg_link); INIT_LIST_HEAD(&call->sock_link); INIT_LIST_HEAD(&call->attend_link); skb_queue_head_init(&call->recvmsg_queue); skb_queue_head_init(&call->rx_queue); skb_queue_head_init(&call->rx_oos_queue); init_waitqueue_head(&call->waitq); spin_lock_init(&call->notify_lock); refcount_set(&call->ref, 1); call->debug_id = debug_id; call->tx_total_len = -1; call->tx_jumbo_max = 1; call->next_rx_timo = 20 * HZ; call->next_req_timo = 1 * HZ; call->ackr_window = 1; call->ackr_wtop = 1; call->delay_ack_at = KTIME_MAX; call->rack_timo_at = KTIME_MAX; call->ping_at = KTIME_MAX; call->keepalive_at = KTIME_MAX; call->expect_rx_by = KTIME_MAX; call->expect_req_by = KTIME_MAX; call->expect_term_by = KTIME_MAX; memset(&call->sock_node, 0xed, sizeof(call->sock_node)); call->rx_winsize = rxrpc_rx_window_size; call->tx_winsize = 16; call->cong_cwnd = RXRPC_MIN_CWND; call->cong_ssthresh = RXRPC_TX_MAX_WINDOW; rxrpc_call_init_rtt(call); call->rxnet = rxnet; call->rtt_avail = RXRPC_CALL_RTT_AVAIL_MASK; atomic_inc(&rxnet->nr_calls); return call; } /* * Allocate a new client call. */ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, struct rxrpc_conn_parameters *cp, struct rxrpc_call_params *p, gfp_t gfp, unsigned int debug_id) { struct rxrpc_call *call; ktime_t now; int ret; _enter(""); call = rxrpc_alloc_call(rx, gfp, debug_id); if (!call) return ERR_PTR(-ENOMEM); now = ktime_get_real(); call->acks_latest_ts = now; call->cong_tstamp = now; call->dest_srx = cp->peer->srx; call->dest_srx.srx_service = cp->service_id; call->interruptibility = p->interruptibility; call->tx_total_len = p->tx_total_len; call->key = key_get(cp->key); call->peer = rxrpc_get_peer(cp->peer, rxrpc_peer_get_call); call->local = rxrpc_get_local(cp->local, rxrpc_local_get_call); call->security_level = cp->security_level; if (p->kernel) __set_bit(RXRPC_CALL_KERNEL, &call->flags); if (cp->upgrade) __set_bit(RXRPC_CALL_UPGRADE, &call->flags); if (cp->exclusive) __set_bit(RXRPC_CALL_EXCLUSIVE, &call->flags); if (p->timeouts.normal) call->next_rx_timo = umin(p->timeouts.normal, 1); if (p->timeouts.idle) call->next_req_timo = umin(p->timeouts.idle, 1); if (p->timeouts.hard) call->hard_timo = p->timeouts.hard; ret = rxrpc_init_client_call_security(call); if (ret < 0) { rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, ret); rxrpc_put_call(call, rxrpc_call_put_discard_error); return ERR_PTR(ret); } rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_AWAIT_CONN); trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), p->user_call_ID, rxrpc_call_new_client); _leave(" = %p", call); return call; } /* * Initiate the call ack/resend/expiry timer. */ void rxrpc_start_call_timer(struct rxrpc_call *call) { if (call->hard_timo) { ktime_t delay = ms_to_ktime(call->hard_timo * 1000); call->expect_term_by = ktime_add(ktime_get_real(), delay); trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_hard); } call->timer.expires = jiffies; } /* * Wait for a call slot to become available. */ static struct semaphore *rxrpc_get_call_slot(struct rxrpc_call_params *p, gfp_t gfp) { struct semaphore *limiter = &rxrpc_call_limiter; if (p->kernel) limiter = &rxrpc_kernel_call_limiter; if (p->interruptibility == RXRPC_UNINTERRUPTIBLE) { down(limiter); return limiter; } return down_interruptible(limiter) < 0 ? NULL : limiter; } /* * Release a call slot. */ static void rxrpc_put_call_slot(struct rxrpc_call *call) { struct semaphore *limiter = &rxrpc_call_limiter; if (test_bit(RXRPC_CALL_KERNEL, &call->flags)) limiter = &rxrpc_kernel_call_limiter; up(limiter); } /* * Start the process of connecting a call. We obtain a peer and a connection * bundle, but the actual association of a call with a connection is offloaded * to the I/O thread to simplify locking. */ static int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp) { struct rxrpc_local *local = call->local; int ret = -ENOMEM; _enter("{%d,%lx},", call->debug_id, call->user_call_ID); ret = rxrpc_look_up_bundle(call, gfp); if (ret < 0) goto error; trace_rxrpc_client(NULL, -1, rxrpc_client_queue_new_call); rxrpc_get_call(call, rxrpc_call_get_io_thread); spin_lock_irq(&local->client_call_lock); list_add_tail(&call->wait_link, &local->new_client_calls); spin_unlock_irq(&local->client_call_lock); rxrpc_wake_up_io_thread(local); return 0; error: __set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); return ret; } /* * Set up a call for the given parameters. * - Called with the socket lock held, which it must release. * - If it returns a call, the call's lock will need releasing by the caller. */ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, struct rxrpc_conn_parameters *cp, struct rxrpc_call_params *p, gfp_t gfp, unsigned int debug_id) __releases(&rx->sk.sk_lock) __acquires(&call->user_mutex) { struct rxrpc_call *call, *xcall; struct rxrpc_net *rxnet; struct semaphore *limiter; struct rb_node *parent, **pp; int ret; _enter("%p,%lx", rx, p->user_call_ID); if (WARN_ON_ONCE(!cp->peer)) { release_sock(&rx->sk); return ERR_PTR(-EIO); } limiter = rxrpc_get_call_slot(p, gfp); if (!limiter) { release_sock(&rx->sk); return ERR_PTR(-ERESTARTSYS); } call = rxrpc_alloc_client_call(rx, cp, p, gfp, debug_id); if (IS_ERR(call)) { release_sock(&rx->sk); up(limiter); _leave(" = %ld", PTR_ERR(call)); return call; } /* We need to protect a partially set up call against the user as we * will be acting outside the socket lock. */ mutex_lock(&call->user_mutex); /* Publish the call, even though it is incompletely set up as yet */ write_lock(&rx->call_lock); pp = &rx->calls.rb_node; parent = NULL; while (*pp) { parent = *pp; xcall = rb_entry(parent, struct rxrpc_call, sock_node); if (p->user_call_ID < xcall->user_call_ID) pp = &(*pp)->rb_left; else if (p->user_call_ID > xcall->user_call_ID) pp = &(*pp)->rb_right; else goto error_dup_user_ID; } rcu_assign_pointer(call->socket, rx); call->user_call_ID = p->user_call_ID; __set_bit(RXRPC_CALL_HAS_USERID, &call->flags); rxrpc_get_call(call, rxrpc_call_get_userid); rb_link_node(&call->sock_node, parent, pp); rb_insert_color(&call->sock_node, &rx->calls); list_add(&call->sock_link, &rx->sock_calls); write_unlock(&rx->call_lock); rxnet = call->rxnet; spin_lock(&rxnet->call_lock); list_add_tail_rcu(&call->link, &rxnet->calls); spin_unlock(&rxnet->call_lock); /* From this point on, the call is protected by its own lock. */ release_sock(&rx->sk); /* Set up or get a connection record and set the protocol parameters, * including channel number and call ID. */ ret = rxrpc_connect_call(call, gfp); if (ret < 0) goto error_attached_to_socket; _leave(" = %p [new]", call); return call; /* We unexpectedly found the user ID in the list after taking * the call_lock. This shouldn't happen unless the user races * with itself and tries to add the same user ID twice at the * same time in different threads. */ error_dup_user_ID: write_unlock(&rx->call_lock); release_sock(&rx->sk); rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, -EEXIST); trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), 0, rxrpc_call_see_userid_exists); mutex_unlock(&call->user_mutex); rxrpc_put_call(call, rxrpc_call_put_userid_exists); _leave(" = -EEXIST"); return ERR_PTR(-EEXIST); /* We got an error, but the call is attached to the socket and is in * need of release. However, we might now race with recvmsg() when it * completion notifies the socket. Return 0 from sys_sendmsg() and * leave the error to recvmsg() to deal with. */ error_attached_to_socket: trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), ret, rxrpc_call_see_connect_failed); rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret); _leave(" = c=%08x [err]", call->debug_id); return call; } /* * Set up an incoming call. call->conn points to the connection. * This is called with interrupts disabled and isn't allowed to fail. */ void rxrpc_incoming_call(struct rxrpc_sock *rx, struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_connection *conn = call->conn; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); u32 chan; _enter(",%d", call->conn->debug_id); rcu_assign_pointer(call->socket, rx); call->call_id = sp->hdr.callNumber; call->dest_srx.srx_service = sp->hdr.serviceId; call->cid = sp->hdr.cid; call->cong_tstamp = skb->tstamp; __set_bit(RXRPC_CALL_EXPOSED, &call->flags); rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); spin_lock(&conn->state_lock); switch (conn->state) { case RXRPC_CONN_SERVICE_UNSECURED: case RXRPC_CONN_SERVICE_CHALLENGING: __set_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags); break; case RXRPC_CONN_SERVICE: break; case RXRPC_CONN_ABORTED: rxrpc_set_call_completion(call, conn->completion, conn->abort_code, conn->error); break; default: BUG(); } rxrpc_get_call(call, rxrpc_call_get_io_thread); /* Set the channel for this call. We don't get channel_lock as we're * only defending against the data_ready handler (which we're called * from) and the RESPONSE packet parser (which is only really * interested in call_counter and can cope with a disagreement with the * call pointer). */ chan = sp->hdr.cid & RXRPC_CHANNELMASK; conn->channels[chan].call_counter = call->call_id; conn->channels[chan].call_id = call->call_id; conn->channels[chan].call = call; spin_unlock(&conn->state_lock); spin_lock(&conn->peer->lock); hlist_add_head(&call->error_link, &conn->peer->error_targets); spin_unlock(&conn->peer->lock); rxrpc_start_call_timer(call); _leave(""); } /* * Note the re-emergence of a call. */ void rxrpc_see_call(struct rxrpc_call *call, enum rxrpc_call_trace why) { if (call) { int r = refcount_read(&call->ref); trace_rxrpc_call(call->debug_id, r, 0, why); } } struct rxrpc_call *rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace why) { int r; if (!call || !__refcount_inc_not_zero(&call->ref, &r)) return NULL; trace_rxrpc_call(call->debug_id, r + 1, 0, why); return call; } /* * Note the addition of a ref on a call. */ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace why) { int r; __refcount_inc(&call->ref, &r); trace_rxrpc_call(call->debug_id, r + 1, 0, why); } /* * Clean up the transmission buffers. */ static void rxrpc_cleanup_tx_buffers(struct rxrpc_call *call) { struct rxrpc_txqueue *tq, *next; for (tq = call->tx_queue; tq; tq = next) { next = tq->next; for (int i = 0; i < RXRPC_NR_TXQUEUE; i++) if (tq->bufs[i]) rxrpc_put_txbuf(tq->bufs[i], rxrpc_txbuf_put_cleaned); trace_rxrpc_tq(call, tq, 0, rxrpc_tq_cleaned); kfree(tq); } } /* * Clean up the receive buffers. */ static void rxrpc_cleanup_rx_buffers(struct rxrpc_call *call) { rxrpc_purge_queue(&call->recvmsg_queue); rxrpc_purge_queue(&call->rx_queue); rxrpc_purge_queue(&call->rx_oos_queue); } /* * Detach a call from its owning socket. */ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) { struct rxrpc_connection *conn = call->conn; bool putu = false; _enter("{%d,%d}", call->debug_id, refcount_read(&call->ref)); trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), call->flags, rxrpc_call_see_release); if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags)) BUG(); rxrpc_put_call_slot(call); /* Note that at this point, the call may still be on or may have been * added back on to the socket receive queue. recvmsg() must discard * released calls. The CALL_RELEASED flag should prevent further * notifications. */ spin_lock_irq(&rx->recvmsg_lock); spin_unlock_irq(&rx->recvmsg_lock); write_lock(&rx->call_lock); if (test_and_clear_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { rb_erase(&call->sock_node, &rx->calls); memset(&call->sock_node, 0xdd, sizeof(call->sock_node)); putu = true; } list_del(&call->sock_link); write_unlock(&rx->call_lock); _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); if (putu) rxrpc_put_call(call, rxrpc_call_put_userid); _leave(""); } /* * release all the calls associated with a socket */ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) { struct rxrpc_call *call; _enter("%p", rx); while (!list_empty(&rx->to_be_accepted)) { call = list_entry(rx->to_be_accepted.next, struct rxrpc_call, accept_link); list_del(&call->accept_link); rxrpc_propose_abort(call, RX_CALL_DEAD, -ECONNRESET, rxrpc_abort_call_sock_release_tba); rxrpc_put_call(call, rxrpc_call_put_release_sock_tba); } while (!list_empty(&rx->sock_calls)) { call = list_entry(rx->sock_calls.next, struct rxrpc_call, sock_link); rxrpc_get_call(call, rxrpc_call_get_release_sock); rxrpc_propose_abort(call, RX_CALL_DEAD, -ECONNRESET, rxrpc_abort_call_sock_release); rxrpc_release_call(rx, call); rxrpc_put_call(call, rxrpc_call_put_release_sock); } while ((call = list_first_entry_or_null(&rx->recvmsg_q, struct rxrpc_call, recvmsg_link))) { list_del_init(&call->recvmsg_link); rxrpc_put_call(call, rxrpc_call_put_release_recvmsg_q); } _leave(""); } /* * release a call */ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace why) { struct rxrpc_net *rxnet = call->rxnet; unsigned int debug_id = call->debug_id; bool dead; int r; ASSERT(call != NULL); dead = __refcount_dec_and_test(&call->ref, &r); trace_rxrpc_call(debug_id, r - 1, 0, why); if (dead) { ASSERTCMP(__rxrpc_call_state(call), ==, RXRPC_CALL_COMPLETE); if (!list_empty(&call->link)) { spin_lock(&rxnet->call_lock); list_del_init(&call->link); spin_unlock(&rxnet->call_lock); } rxrpc_cleanup_call(call); } } /* * Free up the call under RCU. */ static void rxrpc_rcu_free_call(struct rcu_head *rcu) { struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu); struct rxrpc_net *rxnet = READ_ONCE(call->rxnet); kmem_cache_free(rxrpc_call_jar, call); if (atomic_dec_and_test(&rxnet->nr_calls)) wake_up_var(&rxnet->nr_calls); } /* * Final call destruction - but must be done in process context. */ static void rxrpc_destroy_call(struct work_struct *work) { struct rxrpc_call *call = container_of(work, struct rxrpc_call, destroyer); timer_delete_sync(&call->timer); rxrpc_cleanup_tx_buffers(call); rxrpc_cleanup_rx_buffers(call); rxrpc_put_txbuf(call->tx_pending, rxrpc_txbuf_put_cleaned); rxrpc_put_connection(call->conn, rxrpc_conn_put_call); rxrpc_deactivate_bundle(call->bundle); rxrpc_put_bundle(call->bundle, rxrpc_bundle_put_call); rxrpc_put_peer(call->peer, rxrpc_peer_put_call); rxrpc_put_local(call->local, rxrpc_local_put_call); call_rcu(&call->rcu, rxrpc_rcu_free_call); } /* * clean up a call */ void rxrpc_cleanup_call(struct rxrpc_call *call) { memset(&call->sock_node, 0xcd, sizeof(call->sock_node)); ASSERTCMP(__rxrpc_call_state(call), ==, RXRPC_CALL_COMPLETE); ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); timer_delete(&call->timer); if (rcu_read_lock_held()) /* Can't use the rxrpc workqueue as we need to cancel/flush * something that may be running/waiting there. */ schedule_work(&call->destroyer); else rxrpc_destroy_call(&call->destroyer); } /* * Make sure that all calls are gone from a network namespace. To reach this * point, any open UDP sockets in that namespace must have been closed, so any * outstanding calls cannot be doing I/O. */ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) { struct rxrpc_call *call; _enter(""); if (!list_empty(&rxnet->calls)) { spin_lock(&rxnet->call_lock); while (!list_empty(&rxnet->calls)) { call = list_entry(rxnet->calls.next, struct rxrpc_call, link); _debug("Zapping call %p", call); rxrpc_see_call(call, rxrpc_call_see_zap); list_del_init(&call->link); pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", call, refcount_read(&call->ref), rxrpc_call_states[__rxrpc_call_state(call)], call->flags, call->events); spin_unlock(&rxnet->call_lock); cond_resched(); spin_lock(&rxnet->call_lock); } spin_unlock(&rxnet->call_lock); } atomic_dec(&rxnet->nr_calls); wait_var_event(&rxnet->nr_calls, !atomic_read(&rxnet->nr_calls)); } /** * rxrpc_kernel_query_call_security - Query call's security parameters * @call: The call to query * @_service_id: Where to return the service ID * @_enctype: Where to return the "encoding type" * * This queries the security parameters of a call, setting *@_service_id and * *@_enctype and returning the security class. * * Return: The security class protocol number. */ u8 rxrpc_kernel_query_call_security(struct rxrpc_call *call, u16 *_service_id, u32 *_enctype) { *_service_id = call->dest_srx.srx_service; *_enctype = call->security_enctype; return call->security_ix; } EXPORT_SYMBOL(rxrpc_kernel_query_call_security);
4 4 4 1 2 2 2 4 1 1 1 3 3 3 3 3 4 4 4 4 4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/nospec.h> #include <linux/io_uring.h> #include <uapi/linux/io_uring.h> #include "filetable.h" #include "sqpoll.h" #include "fdinfo.h" #include "cancel.h" #include "rsrc.h" #include "opdef.h" #ifdef CONFIG_NET_RX_BUSY_POLL static __cold void common_tracking_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m, const char *tracking_strategy) { seq_puts(m, "NAPI:\tenabled\n"); seq_printf(m, "napi tracking:\t%s\n", tracking_strategy); seq_printf(m, "napi_busy_poll_dt:\t%llu\n", ctx->napi_busy_poll_dt); if (ctx->napi_prefer_busy_poll) seq_puts(m, "napi_prefer_busy_poll:\ttrue\n"); else seq_puts(m, "napi_prefer_busy_poll:\tfalse\n"); } static __cold void napi_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) { unsigned int mode = READ_ONCE(ctx->napi_track_mode); switch (mode) { case IO_URING_NAPI_TRACKING_INACTIVE: seq_puts(m, "NAPI:\tdisabled\n"); break; case IO_URING_NAPI_TRACKING_DYNAMIC: common_tracking_show_fdinfo(ctx, m, "dynamic"); break; case IO_URING_NAPI_TRACKING_STATIC: common_tracking_show_fdinfo(ctx, m, "static"); break; default: seq_printf(m, "NAPI:\tunknown mode (%u)\n", mode); } } #else static inline void napi_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) { } #endif static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) { struct io_overflow_cqe *ocqe; struct io_rings *r = ctx->rings; unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1; unsigned int sq_head = READ_ONCE(r->sq.head); unsigned int sq_tail = READ_ONCE(r->sq.tail); unsigned int cq_head = READ_ONCE(r->cq.head); unsigned int cq_tail = READ_ONCE(r->cq.tail); unsigned int sq_shift = 0; unsigned int sq_entries; int sq_pid = -1, sq_cpu = -1; u64 sq_total_time = 0, sq_work_time = 0; unsigned int i; if (ctx->flags & IORING_SETUP_SQE128) sq_shift = 1; /* * we may get imprecise sqe and cqe info if uring is actively running * since we get cached_sq_head and cached_cq_tail without uring_lock * and sq_tail and cq_head are changed by userspace. But it's ok since * we usually use these info when it is stuck. */ seq_printf(m, "SqMask:\t0x%x\n", sq_mask); seq_printf(m, "SqHead:\t%u\n", sq_head); seq_printf(m, "SqTail:\t%u\n", sq_tail); seq_printf(m, "CachedSqHead:\t%u\n", data_race(ctx->cached_sq_head)); seq_printf(m, "CqMask:\t0x%x\n", cq_mask); seq_printf(m, "CqHead:\t%u\n", cq_head); seq_printf(m, "CqTail:\t%u\n", cq_tail); seq_printf(m, "CachedCqTail:\t%u\n", data_race(ctx->cached_cq_tail)); seq_printf(m, "SQEs:\t%u\n", sq_tail - sq_head); sq_entries = min(sq_tail - sq_head, ctx->sq_entries); for (i = 0; i < sq_entries; i++) { unsigned int entry = i + sq_head; struct io_uring_sqe *sqe; unsigned int sq_idx; bool sqe128 = false; u8 opcode; if (ctx->flags & IORING_SETUP_NO_SQARRAY) sq_idx = entry & sq_mask; else sq_idx = READ_ONCE(ctx->sq_array[entry & sq_mask]); if (sq_idx > sq_mask) continue; sqe = &ctx->sq_sqes[sq_idx << sq_shift]; opcode = READ_ONCE(sqe->opcode); if (opcode >= IORING_OP_LAST) continue; opcode = array_index_nospec(opcode, IORING_OP_LAST); if (sq_shift) { sqe128 = true; } else if (io_issue_defs[opcode].is_128) { if (!(ctx->flags & IORING_SETUP_SQE_MIXED)) { seq_printf(m, "%5u: invalid sqe, 128B entry on non-mixed sq\n", sq_idx); break; } if ((++sq_head & sq_mask) == 0) { seq_printf(m, "%5u: corrupted sqe, wrapping 128B entry\n", sq_idx); break; } sqe128 = true; } seq_printf(m, "%5u: opcode:%s, fd:%d, flags:%x, off:%llu, " "addr:0x%llx, rw_flags:0x%x, buf_index:%d " "user_data:%llu", sq_idx, io_uring_get_opcode(opcode), sqe->fd, sqe->flags, (unsigned long long) sqe->off, (unsigned long long) sqe->addr, sqe->rw_flags, sqe->buf_index, sqe->user_data); if (sqe128) { u64 *sqeb = (void *) (sqe + 1); int size = sizeof(struct io_uring_sqe) / sizeof(u64); int j; for (j = 0; j < size; j++) { seq_printf(m, ", e%d:0x%llx", j, (unsigned long long) *sqeb); sqeb++; } } seq_printf(m, "\n"); } seq_printf(m, "CQEs:\t%u\n", cq_tail - cq_head); while (cq_head < cq_tail) { struct io_uring_cqe *cqe; bool cqe32 = false; cqe = &r->cqes[(cq_head & cq_mask)]; if (cqe->flags & IORING_CQE_F_32 || ctx->flags & IORING_SETUP_CQE32) cqe32 = true; seq_printf(m, "%5u: user_data:%llu, res:%d, flags:%x", cq_head & cq_mask, cqe->user_data, cqe->res, cqe->flags); if (cqe32) seq_printf(m, ", extra1:%llu, extra2:%llu\n", cqe->big_cqe[0], cqe->big_cqe[1]); seq_printf(m, "\n"); cq_head++; if (cqe32) cq_head++; } if (ctx->flags & IORING_SETUP_SQPOLL) { struct io_sq_data *sq = ctx->sq_data; struct task_struct *tsk; rcu_read_lock(); tsk = rcu_dereference(sq->thread); /* * sq->thread might be NULL if we raced with the sqpoll * thread termination. */ if (tsk) { u64 usec; get_task_struct(tsk); rcu_read_unlock(); usec = io_sq_cpu_usec(tsk); put_task_struct(tsk); sq_pid = sq->task_pid; sq_cpu = sq->sq_cpu; sq_total_time = usec; sq_work_time = sq->work_time; } else { rcu_read_unlock(); } } seq_printf(m, "SqThread:\t%d\n", sq_pid); seq_printf(m, "SqThreadCpu:\t%d\n", sq_cpu); seq_printf(m, "SqTotalTime:\t%llu\n", sq_total_time); seq_printf(m, "SqWorkTime:\t%llu\n", sq_work_time); seq_printf(m, "UserFiles:\t%u\n", ctx->file_table.data.nr); for (i = 0; i < ctx->file_table.data.nr; i++) { struct file *f = NULL; if (ctx->file_table.data.nodes[i]) f = io_slot_file(ctx->file_table.data.nodes[i]); if (f) { seq_printf(m, "%5u: ", i); seq_file_path(m, f, " \t\n\\"); seq_puts(m, "\n"); } } seq_printf(m, "UserBufs:\t%u\n", ctx->buf_table.nr); for (i = 0; i < ctx->buf_table.nr; i++) { struct io_mapped_ubuf *buf = NULL; if (ctx->buf_table.nodes[i]) buf = ctx->buf_table.nodes[i]->buf; if (buf) seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf, buf->len); else seq_printf(m, "%5u: <none>\n", i); } seq_puts(m, "PollList:\n"); for (i = 0; i < (1U << ctx->cancel_table.hash_bits); i++) { struct io_hash_bucket *hb = &ctx->cancel_table.hbs[i]; struct io_kiocb *req; hlist_for_each_entry(req, &hb->list, hash_node) seq_printf(m, " op=%d, task_works=%d\n", req->opcode, task_work_pending(req->tctx->task)); } seq_puts(m, "CqOverflowList:\n"); spin_lock(&ctx->completion_lock); list_for_each_entry(ocqe, &ctx->cq_overflow_list, list) { struct io_uring_cqe *cqe = &ocqe->cqe; seq_printf(m, " user_data=%llu, res=%d, flags=%x\n", cqe->user_data, cqe->res, cqe->flags); } spin_unlock(&ctx->completion_lock); napi_show_fdinfo(ctx, m); } /* * Caller holds a reference to the file already, we don't need to do * anything else to get an extra reference. */ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file) { struct io_ring_ctx *ctx = file->private_data; /* * Avoid ABBA deadlock between the seq lock and the io_uring mutex, * since fdinfo case grabs it in the opposite direction of normal use * cases. */ if (mutex_trylock(&ctx->uring_lock)) { __io_uring_show_fdinfo(ctx, m); mutex_unlock(&ctx->uring_lock); } }
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 // SPDX-License-Identifier: GPL-2.0 /* * Adiantum length-preserving encryption mode * * Copyright 2018 Google LLC */ /* * Adiantum is a tweakable, length-preserving encryption mode designed for fast * and secure disk encryption, especially on CPUs without dedicated crypto * instructions. Adiantum encrypts each sector using the XChaCha12 stream * cipher, two passes of an ε-almost-∆-universal (ε-∆U) hash function based on * NH and Poly1305, and an invocation of the AES-256 block cipher on a single * 16-byte block. See the paper for details: * * Adiantum: length-preserving encryption for entry-level processors * (https://eprint.iacr.org/2018/720.pdf) * * For flexibility, this implementation also allows other ciphers: * * - Stream cipher: XChaCha12 or XChaCha20 * - Block cipher: any with a 128-bit block size and 256-bit key * * This implementation doesn't currently allow other ε-∆U hash functions, i.e. * HPolyC is not supported. This is because Adiantum is ~20% faster than HPolyC * but still provably as secure, and also the ε-∆U hash function of HBSH is * formally defined to take two inputs (tweak, message) which makes it difficult * to wrap with the crypto_shash API. Rather, some details need to be handled * here. Nevertheless, if needed in the future, support for other ε-∆U hash * functions could be added here. */ #include <crypto/b128ops.h> #include <crypto/chacha.h> #include <crypto/internal/cipher.h> #include <crypto/internal/hash.h> #include <crypto/internal/poly1305.h> #include <crypto/internal/skcipher.h> #include <crypto/nhpoly1305.h> #include <crypto/scatterwalk.h> #include <linux/module.h> /* * Size of right-hand part of input data, in bytes; also the size of the block * cipher's block size and the hash function's output. */ #define BLOCKCIPHER_BLOCK_SIZE 16 /* Size of the block cipher key (K_E) in bytes */ #define BLOCKCIPHER_KEY_SIZE 32 /* Size of the hash key (K_H) in bytes */ #define HASH_KEY_SIZE (POLY1305_BLOCK_SIZE + NHPOLY1305_KEY_SIZE) /* * The specification allows variable-length tweaks, but Linux's crypto API * currently only allows algorithms to support a single length. The "natural" * tweak length for Adiantum is 16, since that fits into one Poly1305 block for * the best performance. But longer tweaks are useful for fscrypt, to avoid * needing to derive per-file keys. So instead we use two blocks, or 32 bytes. */ #define TWEAK_SIZE 32 struct adiantum_instance_ctx { struct crypto_skcipher_spawn streamcipher_spawn; struct crypto_cipher_spawn blockcipher_spawn; struct crypto_shash_spawn hash_spawn; }; struct adiantum_tfm_ctx { struct crypto_skcipher *streamcipher; struct crypto_cipher *blockcipher; struct crypto_shash *hash; struct poly1305_core_key header_hash_key; }; struct adiantum_request_ctx { /* * Buffer for right-hand part of data, i.e. * * P_L => P_M => C_M => C_R when encrypting, or * C_R => C_M => P_M => P_L when decrypting. * * Also used to build the IV for the stream cipher. */ union { u8 bytes[XCHACHA_IV_SIZE]; __le32 words[XCHACHA_IV_SIZE / sizeof(__le32)]; le128 bignum; /* interpret as element of Z/(2^{128}Z) */ } rbuf; bool enc; /* true if encrypting, false if decrypting */ /* * The result of the Poly1305 ε-∆U hash function applied to * (bulk length, tweak) */ le128 header_hash; /* Sub-requests, must be last */ union { struct shash_desc hash_desc; struct skcipher_request streamcipher_req; } u; }; /* * Given the XChaCha stream key K_S, derive the block cipher key K_E and the * hash key K_H as follows: * * K_E || K_H || ... = XChaCha(key=K_S, nonce=1||0^191) * * Note that this denotes using bits from the XChaCha keystream, which here we * get indirectly by encrypting a buffer containing all 0's. */ static int adiantum_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); struct { u8 iv[XCHACHA_IV_SIZE]; u8 derived_keys[BLOCKCIPHER_KEY_SIZE + HASH_KEY_SIZE]; struct scatterlist sg; struct crypto_wait wait; struct skcipher_request req; /* must be last */ } *data; u8 *keyp; int err; /* Set the stream cipher key (K_S) */ crypto_skcipher_clear_flags(tctx->streamcipher, CRYPTO_TFM_REQ_MASK); crypto_skcipher_set_flags(tctx->streamcipher, crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_REQ_MASK); err = crypto_skcipher_setkey(tctx->streamcipher, key, keylen); if (err) return err; /* Derive the subkeys */ data = kzalloc(sizeof(*data) + crypto_skcipher_reqsize(tctx->streamcipher), GFP_KERNEL); if (!data) return -ENOMEM; data->iv[0] = 1; sg_init_one(&data->sg, data->derived_keys, sizeof(data->derived_keys)); crypto_init_wait(&data->wait); skcipher_request_set_tfm(&data->req, tctx->streamcipher); skcipher_request_set_callback(&data->req, CRYPTO_TFM_REQ_MAY_SLEEP | CRYPTO_TFM_REQ_MAY_BACKLOG, crypto_req_done, &data->wait); skcipher_request_set_crypt(&data->req, &data->sg, &data->sg, sizeof(data->derived_keys), data->iv); err = crypto_wait_req(crypto_skcipher_encrypt(&data->req), &data->wait); if (err) goto out; keyp = data->derived_keys; /* Set the block cipher key (K_E) */ crypto_cipher_clear_flags(tctx->blockcipher, CRYPTO_TFM_REQ_MASK); crypto_cipher_set_flags(tctx->blockcipher, crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_REQ_MASK); err = crypto_cipher_setkey(tctx->blockcipher, keyp, BLOCKCIPHER_KEY_SIZE); if (err) goto out; keyp += BLOCKCIPHER_KEY_SIZE; /* Set the hash key (K_H) */ poly1305_core_setkey(&tctx->header_hash_key, keyp); keyp += POLY1305_BLOCK_SIZE; crypto_shash_clear_flags(tctx->hash, CRYPTO_TFM_REQ_MASK); crypto_shash_set_flags(tctx->hash, crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_REQ_MASK); err = crypto_shash_setkey(tctx->hash, keyp, NHPOLY1305_KEY_SIZE); keyp += NHPOLY1305_KEY_SIZE; WARN_ON(keyp != &data->derived_keys[ARRAY_SIZE(data->derived_keys)]); out: kfree_sensitive(data); return err; } /* Addition in Z/(2^{128}Z) */ static inline void le128_add(le128 *r, const le128 *v1, const le128 *v2) { u64 x = le64_to_cpu(v1->b); u64 y = le64_to_cpu(v2->b); r->b = cpu_to_le64(x + y); r->a = cpu_to_le64(le64_to_cpu(v1->a) + le64_to_cpu(v2->a) + (x + y < x)); } /* Subtraction in Z/(2^{128}Z) */ static inline void le128_sub(le128 *r, const le128 *v1, const le128 *v2) { u64 x = le64_to_cpu(v1->b); u64 y = le64_to_cpu(v2->b); r->b = cpu_to_le64(x - y); r->a = cpu_to_le64(le64_to_cpu(v1->a) - le64_to_cpu(v2->a) - (x - y > x)); } /* * Apply the Poly1305 ε-∆U hash function to (bulk length, tweak) and save the * result to rctx->header_hash. This is the calculation * * H_T ← Poly1305_{K_T}(bin_{128}(|L|) || T) * * from the procedure in section 6.4 of the Adiantum paper. The resulting value * is reused in both the first and second hash steps. Specifically, it's added * to the result of an independently keyed ε-∆U hash function (for equal length * inputs only) taken over the left-hand part (the "bulk") of the message, to * give the overall Adiantum hash of the (tweak, left-hand part) pair. */ static void adiantum_hash_header(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); struct adiantum_request_ctx *rctx = skcipher_request_ctx(req); const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE; struct { __le64 message_bits; __le64 padding; } header = { .message_bits = cpu_to_le64((u64)bulk_len * 8) }; struct poly1305_state state; poly1305_core_init(&state); BUILD_BUG_ON(sizeof(header) % POLY1305_BLOCK_SIZE != 0); poly1305_core_blocks(&state, &tctx->header_hash_key, &header, sizeof(header) / POLY1305_BLOCK_SIZE, 1); BUILD_BUG_ON(TWEAK_SIZE % POLY1305_BLOCK_SIZE != 0); poly1305_core_blocks(&state, &tctx->header_hash_key, req->iv, TWEAK_SIZE / POLY1305_BLOCK_SIZE, 1); poly1305_core_emit(&state, NULL, &rctx->header_hash); } /* Hash the left-hand part (the "bulk") of the message using NHPoly1305 */ static int adiantum_hash_message(struct skcipher_request *req, struct scatterlist *sgl, unsigned int nents, le128 *digest) { struct adiantum_request_ctx *rctx = skcipher_request_ctx(req); const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE; struct shash_desc *hash_desc = &rctx->u.hash_desc; struct sg_mapping_iter miter; unsigned int i, n; int err; err = crypto_shash_init(hash_desc); if (err) return err; sg_miter_start(&miter, sgl, nents, SG_MITER_FROM_SG | SG_MITER_ATOMIC); for (i = 0; i < bulk_len; i += n) { sg_miter_next(&miter); n = min_t(unsigned int, miter.length, bulk_len - i); err = crypto_shash_update(hash_desc, miter.addr, n); if (err) break; } sg_miter_stop(&miter); if (err) return err; return crypto_shash_final(hash_desc, (u8 *)digest); } /* Continue Adiantum encryption/decryption after the stream cipher step */ static int adiantum_finish(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); struct adiantum_request_ctx *rctx = skcipher_request_ctx(req); const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE; struct scatterlist *dst = req->dst; const unsigned int dst_nents = sg_nents(dst); le128 digest; int err; /* If decrypting, decrypt C_M with the block cipher to get P_M */ if (!rctx->enc) crypto_cipher_decrypt_one(tctx->blockcipher, rctx->rbuf.bytes, rctx->rbuf.bytes); /* * Second hash step * enc: C_R = C_M - H_{K_H}(T, C_L) * dec: P_R = P_M - H_{K_H}(T, P_L) */ rctx->u.hash_desc.tfm = tctx->hash; le128_sub(&rctx->rbuf.bignum, &rctx->rbuf.bignum, &rctx->header_hash); if (dst_nents == 1 && dst->offset + req->cryptlen <= PAGE_SIZE) { /* Fast path for single-page destination */ struct page *page = sg_page(dst); void *virt = kmap_local_page(page) + dst->offset; err = crypto_shash_digest(&rctx->u.hash_desc, virt, bulk_len, (u8 *)&digest); if (err) { kunmap_local(virt); return err; } le128_sub(&rctx->rbuf.bignum, &rctx->rbuf.bignum, &digest); memcpy(virt + bulk_len, &rctx->rbuf.bignum, sizeof(le128)); flush_dcache_page(page); kunmap_local(virt); } else { /* Slow path that works for any destination scatterlist */ err = adiantum_hash_message(req, dst, dst_nents, &digest); if (err) return err; le128_sub(&rctx->rbuf.bignum, &rctx->rbuf.bignum, &digest); scatterwalk_map_and_copy(&rctx->rbuf.bignum, dst, bulk_len, sizeof(le128), 1); } return 0; } static void adiantum_streamcipher_done(void *data, int err) { struct skcipher_request *req = data; if (!err) err = adiantum_finish(req); skcipher_request_complete(req, err); } static int adiantum_crypt(struct skcipher_request *req, bool enc) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); struct adiantum_request_ctx *rctx = skcipher_request_ctx(req); const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE; struct scatterlist *src = req->src; const unsigned int src_nents = sg_nents(src); unsigned int stream_len; le128 digest; int err; if (req->cryptlen < BLOCKCIPHER_BLOCK_SIZE) return -EINVAL; rctx->enc = enc; /* * First hash step * enc: P_M = P_R + H_{K_H}(T, P_L) * dec: C_M = C_R + H_{K_H}(T, C_L) */ adiantum_hash_header(req); rctx->u.hash_desc.tfm = tctx->hash; if (src_nents == 1 && src->offset + req->cryptlen <= PAGE_SIZE) { /* Fast path for single-page source */ void *virt = kmap_local_page(sg_page(src)) + src->offset; err = crypto_shash_digest(&rctx->u.hash_desc, virt, bulk_len, (u8 *)&digest); memcpy(&rctx->rbuf.bignum, virt + bulk_len, sizeof(le128)); kunmap_local(virt); } else { /* Slow path that works for any source scatterlist */ err = adiantum_hash_message(req, src, src_nents, &digest); scatterwalk_map_and_copy(&rctx->rbuf.bignum, src, bulk_len, sizeof(le128), 0); } if (err) return err; le128_add(&rctx->rbuf.bignum, &rctx->rbuf.bignum, &rctx->header_hash); le128_add(&rctx->rbuf.bignum, &rctx->rbuf.bignum, &digest); /* If encrypting, encrypt P_M with the block cipher to get C_M */ if (enc) crypto_cipher_encrypt_one(tctx->blockcipher, rctx->rbuf.bytes, rctx->rbuf.bytes); /* Initialize the rest of the XChaCha IV (first part is C_M) */ BUILD_BUG_ON(BLOCKCIPHER_BLOCK_SIZE != 16); BUILD_BUG_ON(XCHACHA_IV_SIZE != 32); /* nonce || stream position */ rctx->rbuf.words[4] = cpu_to_le32(1); rctx->rbuf.words[5] = 0; rctx->rbuf.words[6] = 0; rctx->rbuf.words[7] = 0; /* * XChaCha needs to be done on all the data except the last 16 bytes; * for disk encryption that usually means 4080 or 496 bytes. But ChaCha * implementations tend to be most efficient when passed a whole number * of 64-byte ChaCha blocks, or sometimes even a multiple of 256 bytes. * And here it doesn't matter whether the last 16 bytes are written to, * as the second hash step will overwrite them. Thus, round the XChaCha * length up to the next 64-byte boundary if possible. */ stream_len = bulk_len; if (round_up(stream_len, CHACHA_BLOCK_SIZE) <= req->cryptlen) stream_len = round_up(stream_len, CHACHA_BLOCK_SIZE); skcipher_request_set_tfm(&rctx->u.streamcipher_req, tctx->streamcipher); skcipher_request_set_crypt(&rctx->u.streamcipher_req, req->src, req->dst, stream_len, &rctx->rbuf); skcipher_request_set_callback(&rctx->u.streamcipher_req, req->base.flags, adiantum_streamcipher_done, req); return crypto_skcipher_encrypt(&rctx->u.streamcipher_req) ?: adiantum_finish(req); } static int adiantum_encrypt(struct skcipher_request *req) { return adiantum_crypt(req, true); } static int adiantum_decrypt(struct skcipher_request *req) { return adiantum_crypt(req, false); } static int adiantum_init_tfm(struct crypto_skcipher *tfm) { struct skcipher_instance *inst = skcipher_alg_instance(tfm); struct adiantum_instance_ctx *ictx = skcipher_instance_ctx(inst); struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); struct crypto_skcipher *streamcipher; struct crypto_cipher *blockcipher; struct crypto_shash *hash; unsigned int subreq_size; int err; streamcipher = crypto_spawn_skcipher(&ictx->streamcipher_spawn); if (IS_ERR(streamcipher)) return PTR_ERR(streamcipher); blockcipher = crypto_spawn_cipher(&ictx->blockcipher_spawn); if (IS_ERR(blockcipher)) { err = PTR_ERR(blockcipher); goto err_free_streamcipher; } hash = crypto_spawn_shash(&ictx->hash_spawn); if (IS_ERR(hash)) { err = PTR_ERR(hash); goto err_free_blockcipher; } tctx->streamcipher = streamcipher; tctx->blockcipher = blockcipher; tctx->hash = hash; BUILD_BUG_ON(offsetofend(struct adiantum_request_ctx, u) != sizeof(struct adiantum_request_ctx)); subreq_size = max(sizeof_field(struct adiantum_request_ctx, u.hash_desc) + crypto_shash_descsize(hash), sizeof_field(struct adiantum_request_ctx, u.streamcipher_req) + crypto_skcipher_reqsize(streamcipher)); crypto_skcipher_set_reqsize(tfm, offsetof(struct adiantum_request_ctx, u) + subreq_size); return 0; err_free_blockcipher: crypto_free_cipher(blockcipher); err_free_streamcipher: crypto_free_skcipher(streamcipher); return err; } static void adiantum_exit_tfm(struct crypto_skcipher *tfm) { struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); crypto_free_skcipher(tctx->streamcipher); crypto_free_cipher(tctx->blockcipher); crypto_free_shash(tctx->hash); } static void adiantum_free_instance(struct skcipher_instance *inst) { struct adiantum_instance_ctx *ictx = skcipher_instance_ctx(inst); crypto_drop_skcipher(&ictx->streamcipher_spawn); crypto_drop_cipher(&ictx->blockcipher_spawn); crypto_drop_shash(&ictx->hash_spawn); kfree(inst); } /* * Check for a supported set of inner algorithms. * See the comment at the beginning of this file. */ static bool adiantum_supported_algorithms(struct skcipher_alg_common *streamcipher_alg, struct crypto_alg *blockcipher_alg, struct shash_alg *hash_alg) { if (strcmp(streamcipher_alg->base.cra_name, "xchacha12") != 0 && strcmp(streamcipher_alg->base.cra_name, "xchacha20") != 0) return false; if (blockcipher_alg->cra_cipher.cia_min_keysize > BLOCKCIPHER_KEY_SIZE || blockcipher_alg->cra_cipher.cia_max_keysize < BLOCKCIPHER_KEY_SIZE) return false; if (blockcipher_alg->cra_blocksize != BLOCKCIPHER_BLOCK_SIZE) return false; if (strcmp(hash_alg->base.cra_name, "nhpoly1305") != 0) return false; return true; } static int adiantum_create(struct crypto_template *tmpl, struct rtattr **tb) { u32 mask; const char *nhpoly1305_name; struct skcipher_instance *inst; struct adiantum_instance_ctx *ictx; struct skcipher_alg_common *streamcipher_alg; struct crypto_alg *blockcipher_alg; struct shash_alg *hash_alg; int err; err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER, &mask); if (err) return err; inst = kzalloc(sizeof(*inst) + sizeof(*ictx), GFP_KERNEL); if (!inst) return -ENOMEM; ictx = skcipher_instance_ctx(inst); /* Stream cipher, e.g. "xchacha12" */ err = crypto_grab_skcipher(&ictx->streamcipher_spawn, skcipher_crypto_instance(inst), crypto_attr_alg_name(tb[1]), 0, mask); if (err) goto err_free_inst; streamcipher_alg = crypto_spawn_skcipher_alg_common(&ictx->streamcipher_spawn); /* Block cipher, e.g. "aes" */ err = crypto_grab_cipher(&ictx->blockcipher_spawn, skcipher_crypto_instance(inst), crypto_attr_alg_name(tb[2]), 0, mask); if (err) goto err_free_inst; blockcipher_alg = crypto_spawn_cipher_alg(&ictx->blockcipher_spawn); /* NHPoly1305 ε-∆U hash function */ nhpoly1305_name = crypto_attr_alg_name(tb[3]); if (nhpoly1305_name == ERR_PTR(-ENOENT)) nhpoly1305_name = "nhpoly1305"; err = crypto_grab_shash(&ictx->hash_spawn, skcipher_crypto_instance(inst), nhpoly1305_name, 0, mask); if (err) goto err_free_inst; hash_alg = crypto_spawn_shash_alg(&ictx->hash_spawn); /* Check the set of algorithms */ if (!adiantum_supported_algorithms(streamcipher_alg, blockcipher_alg, hash_alg)) { pr_warn("Unsupported Adiantum instantiation: (%s,%s,%s)\n", streamcipher_alg->base.cra_name, blockcipher_alg->cra_name, hash_alg->base.cra_name); err = -EINVAL; goto err_free_inst; } /* Instance fields */ err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "adiantum(%s,%s)", streamcipher_alg->base.cra_name, blockcipher_alg->cra_name) >= CRYPTO_MAX_ALG_NAME) goto err_free_inst; if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "adiantum(%s,%s,%s)", streamcipher_alg->base.cra_driver_name, blockcipher_alg->cra_driver_name, hash_alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) goto err_free_inst; inst->alg.base.cra_blocksize = BLOCKCIPHER_BLOCK_SIZE; inst->alg.base.cra_ctxsize = sizeof(struct adiantum_tfm_ctx); inst->alg.base.cra_alignmask = streamcipher_alg->base.cra_alignmask; /* * The block cipher is only invoked once per message, so for long * messages (e.g. sectors for disk encryption) its performance doesn't * matter as much as that of the stream cipher and hash function. Thus, * weigh the block cipher's ->cra_priority less. */ inst->alg.base.cra_priority = (4 * streamcipher_alg->base.cra_priority + 2 * hash_alg->base.cra_priority + blockcipher_alg->cra_priority) / 7; inst->alg.setkey = adiantum_setkey; inst->alg.encrypt = adiantum_encrypt; inst->alg.decrypt = adiantum_decrypt; inst->alg.init = adiantum_init_tfm; inst->alg.exit = adiantum_exit_tfm; inst->alg.min_keysize = streamcipher_alg->min_keysize; inst->alg.max_keysize = streamcipher_alg->max_keysize; inst->alg.ivsize = TWEAK_SIZE; inst->free = adiantum_free_instance; err = skcipher_register_instance(tmpl, inst); if (err) { err_free_inst: adiantum_free_instance(inst); } return err; } /* adiantum(streamcipher_name, blockcipher_name [, nhpoly1305_name]) */ static struct crypto_template adiantum_tmpl = { .name = "adiantum", .create = adiantum_create, .module = THIS_MODULE, }; static int __init adiantum_module_init(void) { return crypto_register_template(&adiantum_tmpl); } static void __exit adiantum_module_exit(void) { crypto_unregister_template(&adiantum_tmpl); } module_init(adiantum_module_init); module_exit(adiantum_module_exit); MODULE_DESCRIPTION("Adiantum length-preserving encryption mode"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>"); MODULE_ALIAS_CRYPTO("adiantum"); MODULE_IMPORT_NS("CRYPTO_INTERNAL");
7 48 4 5 9 53 9 45 1 59 15 45 45 4 41 6 43 5 40 55 3 53 2 58 56 56 2 50 50 57 58 55 5 50 1 52 1 1 1 50 2 102 5 54 50 49 41 119 1 119 118 117 118 118 47 13 2 54 9 2 59 3 59 4 50 6 5 55 5 49 50 50 50 116 8 27 27 27 18 2 25 24 3 21 3 49 49 48 49 49 1 49 49 1 48 48 48 17 1 13 1 2 1 13 10 1 1 31 13 13 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 // SPDX-License-Identifier: GPL-2.0-or-later /* * Internet Control Message Protocol (ICMPv6) * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * * Based on net/ipv4/icmp.c * * RFC 1885 */ /* * Changes: * * Andi Kleen : exception handling * Andi Kleen add rate limits. never reply to a icmp. * add more length checks and other fixes. * yoshfuji : ensure to sent parameter problem for * fragments. * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit. * Randy Dunlap and * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data */ #define pr_fmt(fmt) "IPv6: " fmt #include <linux/module.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/netfilter.h> #include <linux/slab.h> #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/icmpv6.h> #include <net/ip.h> #include <net/sock.h> #include <net/ipv6.h> #include <net/ip6_checksum.h> #include <net/ping.h> #include <net/protocol.h> #include <net/raw.h> #include <net/rawv6.h> #include <net/seg6.h> #include <net/transp_v6.h> #include <net/ip6_route.h> #include <net/addrconf.h> #include <net/icmp.h> #include <net/xfrm.h> #include <net/inet_common.h> #include <net/dsfield.h> #include <net/l3mdev.h> #include <linux/uaccess.h> static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk); static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */ struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset); struct net *net = dev_net_rcu(skb->dev); if (type == ICMPV6_PKT_TOOBIG) ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); else if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); if (!(type & ICMPV6_INFOMSG_MASK)) if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) ping_err(skb, offset, ntohl(info)); return 0; } static int icmpv6_rcv(struct sk_buff *skb); static const struct inet6_protocol icmpv6_protocol = { .handler = icmpv6_rcv, .err_handler = icmpv6_err, .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; /* Called with BH disabled */ static struct sock *icmpv6_xmit_lock(struct net *net) { struct sock *sk; sk = this_cpu_read(ipv6_icmp_sk); if (unlikely(!spin_trylock(&sk->sk_lock.slock))) { /* This can happen if the output path (f.e. SIT or * ip6ip6 tunnel) signals dst_link_failure() for an * outgoing ICMP6 packet. */ return NULL; } sock_net_set(sk, net); return sk; } static void icmpv6_xmit_unlock(struct sock *sk) { sock_net_set(sk, &init_net); spin_unlock(&sk->sk_lock.slock); } /* * Figure out, may we reply to this packet with icmp error. * * We do not reply, if: * - it was icmp error message. * - it is truncated, so that it is known, that protocol is ICMPV6 * (i.e. in the middle of some exthdr) * * --ANK (980726) */ static bool is_ineligible(const struct sk_buff *skb) { int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data; int len = skb->len - ptr; __u8 nexthdr = ipv6_hdr(skb)->nexthdr; __be16 frag_off; if (len < 0) return true; ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off); if (ptr < 0) return false; if (nexthdr == IPPROTO_ICMPV6) { u8 _type, *tp; tp = skb_header_pointer(skb, ptr+offsetof(struct icmp6hdr, icmp6_type), sizeof(_type), &_type); /* Based on RFC 8200, Section 4.5 Fragment Header, return * false if this is a fragment packet with no icmp header info. */ if (!tp && frag_off != 0) return false; else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK)) return true; } return false; } static bool icmpv6_mask_allow(struct net *net, int type) { if (type > ICMPV6_MSG_MAX) return true; /* Limit if icmp type is set in ratemask. */ if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask)) return true; return false; } static bool icmpv6_global_allow(struct net *net, int type, bool *apply_ratelimit) { if (icmpv6_mask_allow(net, type)) return true; if (icmp_global_allow(net)) { *apply_ratelimit = true; return true; } __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL); return false; } /* * Check the ICMP output rate limit */ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, struct flowi6 *fl6, bool apply_ratelimit) { struct net *net = sock_net(sk); struct net_device *dev; struct dst_entry *dst; bool res = false; if (!apply_ratelimit) return true; /* * Look up the output route. * XXX: perhaps the expire for routing entries cloned by * this lookup should be more aggressive (not longer than timeout). */ dst = ip6_route_output(net, sk, fl6); rcu_read_lock(); dev = dst_dev_rcu(dst); if (dst->error) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); } else if (dev && (dev->flags & IFF_LOOPBACK)) { res = true; } else { struct rt6_info *rt = dst_rt6_info(dst); int tmo = net->ipv6.sysctl.icmpv6_time; struct inet_peer *peer; /* Give more bandwidth to wider prefixes. */ if (rt->rt6i_dst.plen < 128) tmo >>= ((128 - rt->rt6i_dst.plen)>>5); peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr); res = inet_peer_xrlim_allow(peer, tmo); } rcu_read_unlock(); if (!res) __ICMP6_INC_STATS(net, NULL, ICMP6_MIB_RATELIMITHOST); else icmp_global_consume(net); dst_release(dst); return res; } static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type, struct flowi6 *fl6) { struct net *net = sock_net(sk); struct dst_entry *dst; bool res = false; dst = ip6_route_output(net, sk, fl6); if (!dst->error) { struct rt6_info *rt = dst_rt6_info(dst); struct in6_addr prefsrc; rt6_get_prefsrc(rt, &prefsrc); res = !ipv6_addr_any(&prefsrc); } dst_release(dst); return res; } /* * an inline helper for the "simple" if statement below * checks if parameter problem report is caused by an * unrecognized IPv6 option that has the Option Type * highest-order two bits set to 10 */ static bool opt_unrec(struct sk_buff *skb, __u32 offset) { u8 _optval, *op; offset += skb_network_offset(skb); op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval); if (!op) return true; return (*op & 0xC0) == 0x80; } void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len) { struct sk_buff *skb; struct icmp6hdr *icmp6h; skb = skb_peek(&sk->sk_write_queue); if (!skb) return; icmp6h = icmp6_hdr(skb); memcpy(icmp6h, thdr, sizeof(struct icmp6hdr)); icmp6h->icmp6_cksum = 0; if (skb_queue_len(&sk->sk_write_queue) == 1) { skb->csum = csum_partial(icmp6h, sizeof(struct icmp6hdr), skb->csum); icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr, &fl6->daddr, len, fl6->flowi6_proto, skb->csum); } else { __wsum tmp_csum = 0; skb_queue_walk(&sk->sk_write_queue, skb) { tmp_csum = csum_add(tmp_csum, skb->csum); } tmp_csum = csum_partial(icmp6h, sizeof(struct icmp6hdr), tmp_csum); icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr, &fl6->daddr, len, fl6->flowi6_proto, tmp_csum); } ip6_push_pending_frames(sk); } struct icmpv6_msg { struct sk_buff *skb; int offset; uint8_t type; }; static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) { struct icmpv6_msg *msg = (struct icmpv6_msg *) from; struct sk_buff *org_skb = msg->skb; __wsum csum; csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset, to, len); skb->csum = csum_block_add(skb->csum, csum, odd); if (!(msg->type & ICMPV6_INFOMSG_MASK)) nf_ct_attach(skb, org_skb); return 0; } #if IS_ENABLED(CONFIG_IPV6_MIP6) static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) { struct ipv6hdr *iph = ipv6_hdr(skb); struct ipv6_destopt_hao *hao; int off; if (opt->dsthao) { off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO); if (likely(off >= 0)) { hao = (struct ipv6_destopt_hao *) (skb_network_header(skb) + off); swap(iph->saddr, hao->addr); } } } #else static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {} #endif static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, struct sock *sk, struct flowi6 *fl6) { struct dst_entry *dst, *dst2; struct flowi6 fl2; int err; err = ip6_dst_lookup(net, sk, &dst, fl6); if (err) return ERR_PTR(err); /* * We won't send icmp if the destination is known * anycast unless we need to treat anycast as unicast. */ if (!READ_ONCE(net->ipv6.sysctl.icmpv6_error_anycast_as_unicast) && ipv6_anycast_destination(dst, &fl6->daddr)) { net_dbg_ratelimited("icmp6_send: acast source\n"); dst_release(dst); return ERR_PTR(-EINVAL); } /* No need to clone since we're just using its address. */ dst2 = dst; dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0); if (!IS_ERR(dst)) { if (dst != dst2) return dst; } else { if (PTR_ERR(dst) == -EPERM) dst = NULL; else return dst; } err = xfrm_decode_session_reverse(net, skb, flowi6_to_flowi(&fl2), AF_INET6); if (err) goto relookup_failed; err = ip6_dst_lookup(net, sk, &dst2, &fl2); if (err) goto relookup_failed; dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP); if (!IS_ERR(dst2)) { dst_release(dst); dst = dst2; } else { err = PTR_ERR(dst2); if (err == -EPERM) { dst_release(dst); return dst2; } else goto relookup_failed; } relookup_failed: if (dst) return dst; return ERR_PTR(err); } static struct net_device *icmp6_dev(const struct sk_buff *skb) { struct net_device *dev = skb->dev; /* for local traffic to local address, skb dev is the loopback * device. Check if there is a dst attached to the skb and if so * get the real device index. Same is needed for replies to a link * local address on a device enslaved to an L3 master device */ if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) { const struct rt6_info *rt6 = skb_rt6_info(skb); /* The destination could be an external IP in Ext Hdr (SRv6, RPL, etc.), * and ip6_null_entry could be set to skb if no route is found. */ if (rt6 && rt6->rt6i_idev) dev = rt6->rt6i_idev->dev; } return dev; } static int icmp6_iif(const struct sk_buff *skb) { return icmp6_dev(skb)->ifindex; } struct icmp6_ext_iio_addr6_subobj { __be16 afi; __be16 reserved; struct in6_addr addr6; }; static unsigned int icmp6_ext_iio_len(void) { return sizeof(struct icmp_extobj_hdr) + /* ifIndex */ sizeof(__be32) + /* Interface Address Sub-Object */ sizeof(struct icmp6_ext_iio_addr6_subobj) + /* Interface Name Sub-Object. Length must be a multiple of 4 * bytes. */ ALIGN(sizeof(struct icmp_ext_iio_name_subobj), 4) + /* MTU */ sizeof(__be32); } static unsigned int icmp6_ext_max_len(u8 ext_objs) { unsigned int ext_max_len; ext_max_len = sizeof(struct icmp_ext_hdr); if (ext_objs & BIT(ICMP_ERR_EXT_IIO_IIF)) ext_max_len += icmp6_ext_iio_len(); return ext_max_len; } static struct in6_addr *icmp6_ext_iio_addr6_find(const struct net_device *dev) { struct inet6_dev *in6_dev; struct inet6_ifaddr *ifa; in6_dev = __in6_dev_get(dev); if (!in6_dev) return NULL; /* It is unclear from RFC 5837 which IP address should be chosen, but * it makes sense to choose a global unicast address. */ list_for_each_entry_rcu(ifa, &in6_dev->addr_list, if_list) { if (ifa->flags & (IFA_F_TENTATIVE | IFA_F_DADFAILED)) continue; if (ipv6_addr_type(&ifa->addr) != IPV6_ADDR_UNICAST || ipv6_addr_src_scope(&ifa->addr) != IPV6_ADDR_SCOPE_GLOBAL) continue; return &ifa->addr; } return NULL; } static void icmp6_ext_iio_iif_append(struct net *net, struct sk_buff *skb, int iif) { struct icmp_ext_iio_name_subobj *name_subobj; struct icmp_extobj_hdr *objh; struct net_device *dev; struct in6_addr *addr6; __be32 data; if (!iif) return; /* Add the fields in the order specified by RFC 5837. */ objh = skb_put(skb, sizeof(*objh)); objh->class_num = ICMP_EXT_OBJ_CLASS_IIO; objh->class_type = ICMP_EXT_CTYPE_IIO_ROLE(ICMP_EXT_CTYPE_IIO_ROLE_IIF); data = htonl(iif); skb_put_data(skb, &data, sizeof(__be32)); objh->class_type |= ICMP_EXT_CTYPE_IIO_IFINDEX; rcu_read_lock(); dev = dev_get_by_index_rcu(net, iif); if (!dev) goto out; addr6 = icmp6_ext_iio_addr6_find(dev); if (addr6) { struct icmp6_ext_iio_addr6_subobj *addr6_subobj; addr6_subobj = skb_put_zero(skb, sizeof(*addr6_subobj)); addr6_subobj->afi = htons(ICMP_AFI_IP6); addr6_subobj->addr6 = *addr6; objh->class_type |= ICMP_EXT_CTYPE_IIO_IPADDR; } name_subobj = skb_put_zero(skb, ALIGN(sizeof(*name_subobj), 4)); name_subobj->len = ALIGN(sizeof(*name_subobj), 4); netdev_copy_name(dev, name_subobj->name); objh->class_type |= ICMP_EXT_CTYPE_IIO_NAME; data = htonl(READ_ONCE(dev->mtu)); skb_put_data(skb, &data, sizeof(__be32)); objh->class_type |= ICMP_EXT_CTYPE_IIO_MTU; out: rcu_read_unlock(); objh->length = htons(skb_tail_pointer(skb) - (unsigned char *)objh); } static void icmp6_ext_objs_append(struct net *net, struct sk_buff *skb, u8 ext_objs, int iif) { if (ext_objs & BIT(ICMP_ERR_EXT_IIO_IIF)) icmp6_ext_iio_iif_append(net, skb, iif); } static struct sk_buff * icmp6_ext_append(struct net *net, struct sk_buff *skb_in, struct icmp6hdr *icmp6h, unsigned int room, int iif) { unsigned int payload_len, ext_max_len, ext_len; struct icmp_ext_hdr *ext_hdr; struct sk_buff *skb; u8 ext_objs; int nhoff; switch (icmp6h->icmp6_type) { case ICMPV6_DEST_UNREACH: case ICMPV6_TIME_EXCEED: break; default: return NULL; } /* Do not overwrite existing extensions. This can happen when we * receive an ICMPv4 message with extensions from a tunnel and * translate it to an ICMPv6 message towards an IPv6 host in the * overlay network. */ if (icmp6h->icmp6_datagram_len) return NULL; ext_objs = READ_ONCE(net->ipv6.sysctl.icmpv6_errors_extension_mask); if (!ext_objs) return NULL; ext_max_len = icmp6_ext_max_len(ext_objs); if (ICMP_EXT_ORIG_DGRAM_MIN_LEN + ext_max_len > room) return NULL; skb = skb_clone(skb_in, GFP_ATOMIC); if (!skb) return NULL; nhoff = skb_network_offset(skb); payload_len = min(skb->len - nhoff, ICMP_EXT_ORIG_DGRAM_MIN_LEN); if (!pskb_network_may_pull(skb, payload_len)) goto free_skb; if (pskb_trim(skb, nhoff + ICMP_EXT_ORIG_DGRAM_MIN_LEN) || __skb_put_padto(skb, nhoff + ICMP_EXT_ORIG_DGRAM_MIN_LEN, false)) goto free_skb; if (pskb_expand_head(skb, 0, ext_max_len, GFP_ATOMIC)) goto free_skb; ext_hdr = skb_put_zero(skb, sizeof(*ext_hdr)); ext_hdr->version = ICMP_EXT_VERSION_2; icmp6_ext_objs_append(net, skb, ext_objs, iif); /* Do not send an empty extension structure. */ ext_len = skb_tail_pointer(skb) - (unsigned char *)ext_hdr; if (ext_len == sizeof(*ext_hdr)) goto free_skb; ext_hdr->checksum = ip_compute_csum(ext_hdr, ext_len); /* The length of the original datagram in 64-bit words (RFC 4884). */ icmp6h->icmp6_datagram_len = ICMP_EXT_ORIG_DGRAM_MIN_LEN / sizeof(u64); return skb; free_skb: consume_skb(skb); return NULL; } /* * Send an ICMP message in response to a packet in error */ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, const struct in6_addr *force_saddr, const struct inet6_skb_parm *parm) { struct inet6_dev *idev = NULL; struct ipv6hdr *hdr = ipv6_hdr(skb); struct sock *sk; struct net *net; struct ipv6_pinfo *np; const struct in6_addr *saddr = NULL; bool apply_ratelimit = false; struct sk_buff *ext_skb; struct dst_entry *dst; unsigned int room; struct icmp6hdr tmp_hdr; struct flowi6 fl6; struct icmpv6_msg msg; struct ipcm6_cookie ipc6; int iif = 0; int addr_type = 0; int len; u32 mark; if ((u8 *)hdr < skb->head || (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb)) return; if (!skb->dev) return; rcu_read_lock(); net = dev_net_rcu(skb->dev); mark = IP6_REPLY_MARK(net, skb->mark); /* * Make sure we respect the rules * i.e. RFC 1885 2.4(e) * Rule (e.1) is enforced by not using icmp6_send * in any code that processes icmp errors. */ addr_type = ipv6_addr_type(&hdr->daddr); if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) || ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr)) saddr = &hdr->daddr; /* * Dest addr check */ if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) { if (type != ICMPV6_PKT_TOOBIG && !(type == ICMPV6_PARAMPROB && code == ICMPV6_UNK_OPTION && (opt_unrec(skb, info)))) goto out; saddr = NULL; } addr_type = ipv6_addr_type(&hdr->saddr); /* * Source addr check */ if (__ipv6_addr_needs_scope_id(addr_type)) { iif = icmp6_iif(skb); } else { /* * The source device is used for looking up which routing table * to use for sending an ICMP error. */ iif = l3mdev_master_ifindex(skb->dev); } /* * Must not send error if the source does not uniquely * identify a single node (RFC2463 Section 2.4). * We check unspecified / multicast addresses here, * and anycast addresses will be checked later. */ if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n", &hdr->saddr, &hdr->daddr); goto out; } /* * Never answer to a ICMP packet. */ if (is_ineligible(skb)) { net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n", &hdr->saddr, &hdr->daddr); goto out; } /* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */ local_bh_disable(); /* Check global sysctl_icmp_msgs_per_sec ratelimit */ if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type, &apply_ratelimit)) goto out_bh_enable; mip6_addr_swap(skb, parm); sk = icmpv6_xmit_lock(net); if (!sk) goto out_bh_enable; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_ICMPV6; fl6.daddr = hdr->saddr; if (force_saddr) saddr = force_saddr; if (saddr) { fl6.saddr = *saddr; } else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) { /* select a more meaningful saddr from input if */ struct net_device *in_netdev; in_netdev = dev_get_by_index(net, parm->iif); if (in_netdev) { ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr, inet6_sk(sk)->srcprefs, &fl6.saddr); dev_put(in_netdev); } } fl6.flowi6_mark = mark; fl6.flowi6_oif = iif; fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; fl6.flowi6_uid = sock_net_uid(net, NULL); fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL); security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); np = inet6_sk(sk); if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit)) goto out_unlock; tmp_hdr.icmp6_type = type; tmp_hdr.icmp6_code = code; tmp_hdr.icmp6_cksum = 0; tmp_hdr.icmp6_pointer = htonl(info); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) fl6.flowi6_oif = READ_ONCE(np->mcast_oif); else if (!fl6.flowi6_oif) fl6.flowi6_oif = READ_ONCE(np->ucast_oif); ipcm6_init_sk(&ipc6, sk); ipc6.sockc.mark = mark; fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); dst = icmpv6_route_lookup(net, skb, sk, &fl6); if (IS_ERR(dst)) goto out_unlock; ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); msg.skb = skb; msg.offset = skb_network_offset(skb); msg.type = type; room = IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr); ext_skb = icmp6_ext_append(net, skb, &tmp_hdr, room, parm->iif); if (ext_skb) msg.skb = ext_skb; len = msg.skb->len - msg.offset; len = min_t(unsigned int, len, room); if (len < 0) { net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n", &hdr->saddr, &hdr->daddr); goto out_dst_release; } idev = __in6_dev_get(skb->dev); if (ip6_append_data(sk, icmpv6_getfrag, &msg, len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), &ipc6, &fl6, dst_rt6_info(dst), MSG_DONTWAIT)) { ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); } else { icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, len + sizeof(struct icmp6hdr)); } out_dst_release: if (ext_skb) consume_skb(ext_skb); dst_release(dst); out_unlock: icmpv6_xmit_unlock(sk); out_bh_enable: local_bh_enable(); out: rcu_read_unlock(); } EXPORT_SYMBOL(icmp6_send); /* Slightly more convenient version of icmp6_send with drop reasons. */ void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos, enum skb_drop_reason reason) { icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb)); kfree_skb_reason(skb, reason); } /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH * if sufficient data bytes are available * @nhs is the size of the tunnel header(s) : * Either an IPv4 header for SIT encap * an IPv4 header + GRE header for GRE encap */ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, unsigned int data_len) { struct in6_addr temp_saddr; struct rt6_info *rt; struct sk_buff *skb2; u32 info = 0; if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8)) return 1; /* RFC 4884 (partial) support for ICMP extensions */ if (data_len < 128 || (data_len & 7) || skb->len < data_len) data_len = 0; skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC); if (!skb2) return 1; skb_dst_drop(skb2); skb_pull(skb2, nhs); skb_reset_network_header(skb2); rt = rt6_lookup(dev_net_rcu(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, skb, 0); if (rt && rt->dst.dev) skb2->dev = rt->dst.dev; ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr); if (data_len) { /* RFC 4884 (partial) support : * insert 0 padding at the end, before the extensions */ __skb_push(skb2, nhs); skb_reset_network_header(skb2); memmove(skb2->data, skb2->data + nhs, data_len - nhs); memset(skb2->data + data_len - nhs, 0, nhs); /* RFC 4884 4.5 : Length is measured in 64-bit words, * and stored in reserved[0] */ info = (data_len/8) << 24; } if (type == ICMP_TIME_EXCEEDED) icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, info, &temp_saddr, IP6CB(skb2)); else icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, info, &temp_saddr, IP6CB(skb2)); if (rt) ip6_rt_put(rt); kfree_skb(skb2); return 0; } EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach); static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) { struct net *net = dev_net_rcu(skb->dev); struct sock *sk; struct inet6_dev *idev; struct ipv6_pinfo *np; const struct in6_addr *saddr = NULL; struct icmp6hdr *icmph = icmp6_hdr(skb); bool apply_ratelimit = false; struct icmp6hdr tmp_hdr; struct flowi6 fl6; struct icmpv6_msg msg; struct dst_entry *dst; struct ipcm6_cookie ipc6; u32 mark = IP6_REPLY_MARK(net, skb->mark); SKB_DR(reason); bool acast; u8 type; if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) && net->ipv6.sysctl.icmpv6_echo_ignore_multicast) return reason; saddr = &ipv6_hdr(skb)->daddr; acast = ipv6_anycast_destination(skb_dst(skb), saddr); if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast) return reason; if (!ipv6_unicast_destination(skb) && !(net->ipv6.sysctl.anycast_src_echo_reply && acast)) saddr = NULL; if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST) type = ICMPV6_EXT_ECHO_REPLY; else type = ICMPV6_ECHO_REPLY; memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); tmp_hdr.icmp6_type = type; memset(&fl6, 0, sizeof(fl6)); if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES) fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb)); fl6.flowi6_proto = IPPROTO_ICMPV6; fl6.daddr = ipv6_hdr(skb)->saddr; if (saddr) fl6.saddr = *saddr; fl6.flowi6_oif = icmp6_iif(skb); fl6.fl6_icmp_type = type; fl6.flowi6_mark = mark; fl6.flowi6_uid = sock_net_uid(net, NULL); security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); local_bh_disable(); sk = icmpv6_xmit_lock(net); if (!sk) goto out_bh_enable; np = inet6_sk(sk); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) fl6.flowi6_oif = READ_ONCE(np->mcast_oif); else if (!fl6.flowi6_oif) fl6.flowi6_oif = READ_ONCE(np->ucast_oif); if (ip6_dst_lookup(net, sk, &dst, &fl6)) goto out; dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0); if (IS_ERR(dst)) goto out; /* Check the ratelimit */ if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY, &apply_ratelimit)) || !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6, apply_ratelimit)) goto out_dst_release; idev = __in6_dev_get(skb->dev); msg.skb = skb; msg.offset = 0; msg.type = type; ipcm6_init_sk(&ipc6, sk); ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb)); ipc6.sockc.mark = mark; if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST) if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr)) goto out_dst_release; if (ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), &ipc6, &fl6, dst_rt6_info(dst), MSG_DONTWAIT)) { __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); } else { icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, skb->len + sizeof(struct icmp6hdr)); reason = SKB_CONSUMED; } out_dst_release: dst_release(dst); out: icmpv6_xmit_unlock(sk); out_bh_enable: local_bh_enable(); return reason; } enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) { struct inet6_skb_parm *opt = IP6CB(skb); struct net *net = dev_net_rcu(skb->dev); const struct inet6_protocol *ipprot; enum skb_drop_reason reason; int inner_offset; __be16 frag_off; u8 nexthdr; reason = pskb_may_pull_reason(skb, sizeof(struct ipv6hdr)); if (reason != SKB_NOT_DROPPED_YET) goto out; seg6_icmp_srh(skb, opt); nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr; if (ipv6_ext_hdr(nexthdr)) { /* now skip over extension headers */ inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off); if (inner_offset < 0) { SKB_DR_SET(reason, IPV6_BAD_EXTHDR); goto out; } } else { inner_offset = sizeof(struct ipv6hdr); } /* Checkin header including 8 bytes of inner protocol header. */ reason = pskb_may_pull_reason(skb, inner_offset + 8); if (reason != SKB_NOT_DROPPED_YET) goto out; /* BUGGG_FUTURE: we should try to parse exthdrs in this packet. Without this we will not able f.e. to make source routed pmtu discovery. Corresponding argument (opt) to notifiers is already added. --ANK (980726) */ ipprot = rcu_dereference(inet6_protos[nexthdr]); if (ipprot && ipprot->err_handler) ipprot->err_handler(skb, opt, type, code, inner_offset, info); raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info); return SKB_CONSUMED; out: __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return reason; } /* * Handle icmp messages */ static int icmpv6_rcv(struct sk_buff *skb) { enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; struct net *net = dev_net_rcu(skb->dev); struct net_device *dev = icmp6_dev(skb); struct inet6_dev *idev = __in6_dev_get(dev); const struct in6_addr *saddr, *daddr; struct icmp6hdr *hdr; u8 type; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { struct sec_path *sp = skb_sec_path(skb); int nh; if (!(sp && sp->xvec[sp->len - 1]->props.flags & XFRM_STATE_ICMP)) { reason = SKB_DROP_REASON_XFRM_POLICY; goto drop_no_count; } if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr))) goto drop_no_count; nh = skb_network_offset(skb); skb_set_network_header(skb, sizeof(*hdr)); if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN, skb)) { reason = SKB_DROP_REASON_XFRM_POLICY; goto drop_no_count; } skb_set_network_header(skb, nh); } __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INMSGS); saddr = &ipv6_hdr(skb)->saddr; daddr = &ipv6_hdr(skb)->daddr; if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) { net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n", saddr, daddr); goto csum_error; } if (!pskb_pull(skb, sizeof(*hdr))) goto discard_it; hdr = icmp6_hdr(skb); type = hdr->icmp6_type; ICMP6MSGIN_INC_STATS(dev_net_rcu(dev), idev, type); switch (type) { case ICMPV6_ECHO_REQUEST: if (!net->ipv6.sysctl.icmpv6_echo_ignore_all) reason = icmpv6_echo_reply(skb); break; case ICMPV6_EXT_ECHO_REQUEST: if (!net->ipv6.sysctl.icmpv6_echo_ignore_all && READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe)) reason = icmpv6_echo_reply(skb); break; case ICMPV6_ECHO_REPLY: case ICMPV6_EXT_ECHO_REPLY: ping_rcv(skb); return 0; case ICMPV6_PKT_TOOBIG: /* BUGGG_FUTURE: if packet contains rthdr, we cannot update standard destination cache. Seems, only "advanced" destination cache will allow to solve this problem --ANK (980726) */ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto discard_it; hdr = icmp6_hdr(skb); /* to notify */ fallthrough; case ICMPV6_DEST_UNREACH: case ICMPV6_TIME_EXCEED: case ICMPV6_PARAMPROB: reason = icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu); break; case NDISC_ROUTER_SOLICITATION: case NDISC_ROUTER_ADVERTISEMENT: case NDISC_NEIGHBOUR_SOLICITATION: case NDISC_NEIGHBOUR_ADVERTISEMENT: case NDISC_REDIRECT: reason = ndisc_rcv(skb); break; case ICMPV6_MGM_QUERY: igmp6_event_query(skb); return 0; case ICMPV6_MGM_REPORT: igmp6_event_report(skb); return 0; case ICMPV6_MGM_REDUCTION: case ICMPV6_NI_QUERY: case ICMPV6_NI_REPLY: case ICMPV6_MLD2_REPORT: case ICMPV6_DHAAD_REQUEST: case ICMPV6_DHAAD_REPLY: case ICMPV6_MOBILE_PREFIX_SOL: case ICMPV6_MOBILE_PREFIX_ADV: break; default: /* informational */ if (type & ICMPV6_INFOMSG_MASK) break; net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n", saddr, daddr); /* * error of unknown type. * must pass to upper level */ reason = icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu); } /* until the v6 path can be better sorted assume failure and * preserve the status quo behaviour for the rest of the paths to here */ if (reason) kfree_skb_reason(skb, reason); else consume_skb(skb); return 0; csum_error: reason = SKB_DROP_REASON_ICMP_CSUM; __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_CSUMERRORS); discard_it: __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INERRORS); drop_no_count: kfree_skb_reason(skb, reason); return 0; } void icmpv6_flow_init(const struct sock *sk, struct flowi6 *fl6, u8 type, const struct in6_addr *saddr, const struct in6_addr *daddr, int oif) { memset(fl6, 0, sizeof(*fl6)); fl6->saddr = *saddr; fl6->daddr = *daddr; fl6->flowi6_proto = IPPROTO_ICMPV6; fl6->fl6_icmp_type = type; fl6->fl6_icmp_code = 0; fl6->flowi6_oif = oif; security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); } int __init icmpv6_init(void) { struct sock *sk; int err, i; for_each_possible_cpu(i) { err = inet_ctl_sock_create(&sk, PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &init_net); if (err < 0) { pr_err("Failed to initialize the ICMP6 control socket (err %d)\n", err); return err; } per_cpu(ipv6_icmp_sk, i) = sk; /* Enough space for 2 64K ICMP packets, including * sk_buff struct overhead. */ sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024); } err = -EAGAIN; if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) goto fail; err = inet6_register_icmp_sender(icmp6_send); if (err) goto sender_reg_err; return 0; sender_reg_err: inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); fail: pr_err("Failed to register ICMP6 protocol\n"); return err; } void icmpv6_cleanup(void) { inet6_unregister_icmp_sender(icmp6_send); inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); } static const struct icmp6_err { int err; int fatal; } tab_unreach[] = { { /* NOROUTE */ .err = ENETUNREACH, .fatal = 0, }, { /* ADM_PROHIBITED */ .err = EACCES, .fatal = 1, }, { /* Was NOT_NEIGHBOUR, now reserved */ .err = EHOSTUNREACH, .fatal = 0, }, { /* ADDR_UNREACH */ .err = EHOSTUNREACH, .fatal = 0, }, { /* PORT_UNREACH */ .err = ECONNREFUSED, .fatal = 1, }, { /* POLICY_FAIL */ .err = EACCES, .fatal = 1, }, { /* REJECT_ROUTE */ .err = EACCES, .fatal = 1, }, }; int icmpv6_err_convert(u8 type, u8 code, int *err) { int fatal = 0; *err = EPROTO; switch (type) { case ICMPV6_DEST_UNREACH: fatal = 1; if (code < ARRAY_SIZE(tab_unreach)) { *err = tab_unreach[code].err; fatal = tab_unreach[code].fatal; } break; case ICMPV6_PKT_TOOBIG: *err = EMSGSIZE; break; case ICMPV6_PARAMPROB: *err = EPROTO; fatal = 1; break; case ICMPV6_TIME_EXCEED: *err = EHOSTUNREACH; break; } return fatal; } EXPORT_SYMBOL(icmpv6_err_convert); #ifdef CONFIG_SYSCTL static u32 icmpv6_errors_extension_mask_all = GENMASK_U8(ICMP_ERR_EXT_COUNT - 1, 0); static struct ctl_table ipv6_icmp_table_template[] = { { .procname = "ratelimit", .data = &init_net.ipv6.sysctl.icmpv6_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, }, { .procname = "echo_ignore_all", .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_all, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "echo_ignore_multicast", .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "echo_ignore_anycast", .data = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, { .procname = "ratemask", .data = &init_net.ipv6.sysctl.icmpv6_ratemask_ptr, .maxlen = ICMPV6_MSG_MAX + 1, .mode = 0644, .proc_handler = proc_do_large_bitmap, }, { .procname = "error_anycast_as_unicast", .data = &init_net.ipv6.sysctl.icmpv6_error_anycast_as_unicast, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { .procname = "errors_extension_mask", .data = &init_net.ipv6.sysctl.icmpv6_errors_extension_mask, .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &icmpv6_errors_extension_mask_all, }, }; struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net) { struct ctl_table *table; table = kmemdup(ipv6_icmp_table_template, sizeof(ipv6_icmp_table_template), GFP_KERNEL); if (table) { table[0].data = &net->ipv6.sysctl.icmpv6_time; table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all; table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast; table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast; table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr; table[5].data = &net->ipv6.sysctl.icmpv6_error_anycast_as_unicast; table[6].data = &net->ipv6.sysctl.icmpv6_errors_extension_mask; } return table; } size_t ipv6_icmp_sysctl_table_size(void) { return ARRAY_SIZE(ipv6_icmp_table_template); } #endif
2 2 1 2 4 4 3 2 1 2 2 1 1 1 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 21 21 8 1 7 2 8 8 8 8 8 8 8 18 18 18 18 13 18 3 1 2 2 12 1 1 2 1 2 1 4 1 1 1 1 2 1 11 1 8 9 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 // SPDX-License-Identifier: GPL-2.0-or-later /* * * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) * Copyright Alan Cox GW4PTS (alan@lxorguk.ukuu.org.uk) * Copyright Tomi Manninen OH2BNS (oh2bns@sral.fi) */ #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/slab.h> #include <net/ax25.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <net/arp.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #include <net/sock.h> #include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/termios.h> /* For TIOCINQ/OUTQ */ #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/notifier.h> #include <linux/init.h> #include <linux/spinlock.h> #include <net/netrom.h> #include <linux/seq_file.h> #include <linux/export.h> static unsigned int nr_neigh_no = 1; static HLIST_HEAD(nr_node_list); static DEFINE_SPINLOCK(nr_node_list_lock); static HLIST_HEAD(nr_neigh_list); static DEFINE_SPINLOCK(nr_neigh_list_lock); static struct nr_node *nr_node_get(ax25_address *callsign) { struct nr_node *found = NULL; struct nr_node *nr_node; spin_lock_bh(&nr_node_list_lock); nr_node_for_each(nr_node, &nr_node_list) if (ax25cmp(callsign, &nr_node->callsign) == 0) { nr_node_hold(nr_node); found = nr_node; break; } spin_unlock_bh(&nr_node_list_lock); return found; } static struct nr_neigh *nr_neigh_get_dev(ax25_address *callsign, struct net_device *dev) { struct nr_neigh *found = NULL; struct nr_neigh *nr_neigh; spin_lock_bh(&nr_neigh_list_lock); nr_neigh_for_each(nr_neigh, &nr_neigh_list) if (ax25cmp(callsign, &nr_neigh->callsign) == 0 && nr_neigh->dev == dev) { nr_neigh_hold(nr_neigh); found = nr_neigh; break; } spin_unlock_bh(&nr_neigh_list_lock); return found; } static void nr_remove_neigh(struct nr_neigh *); /* re-sort the routes in quality order. */ static void re_sort_routes(struct nr_node *nr_node, int x, int y) { if (nr_node->routes[y].quality > nr_node->routes[x].quality) { if (nr_node->which == x) nr_node->which = y; else if (nr_node->which == y) nr_node->which = x; swap(nr_node->routes[x], nr_node->routes[y]); } } /* * Add a new route to a node, and in the process add the node and the * neighbour if it is new. */ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, ax25_address *ax25, ax25_digi *ax25_digi, struct net_device *dev, int quality, int obs_count) { struct nr_node *nr_node; struct nr_neigh *nr_neigh; int i, found; struct net_device *odev; if ((odev=nr_dev_get(nr)) != NULL) { /* Can't add routes to ourself */ dev_put(odev); return -EINVAL; } nr_node = nr_node_get(nr); nr_neigh = nr_neigh_get_dev(ax25, dev); /* * The L2 link to a neighbour has failed in the past * and now a frame comes from this neighbour. We assume * it was a temporary trouble with the link and reset the * routes now (and not wait for a node broadcast). */ if (nr_neigh != NULL && nr_neigh->failed != 0 && quality == 0) { struct nr_node *nr_nodet; spin_lock_bh(&nr_node_list_lock); nr_node_for_each(nr_nodet, &nr_node_list) { nr_node_lock(nr_nodet); for (i = 0; i < nr_nodet->count; i++) if (nr_nodet->routes[i].neighbour == nr_neigh) if (i < nr_nodet->which) nr_nodet->which = i; nr_node_unlock(nr_nodet); } spin_unlock_bh(&nr_node_list_lock); } if (nr_neigh != NULL) nr_neigh->failed = 0; if (quality == 0 && nr_neigh != NULL && nr_node != NULL) { nr_neigh_put(nr_neigh); nr_node_put(nr_node); return 0; } if (nr_neigh == NULL) { if ((nr_neigh = kmalloc(sizeof(*nr_neigh), GFP_ATOMIC)) == NULL) { if (nr_node) nr_node_put(nr_node); return -ENOMEM; } nr_neigh->callsign = *ax25; nr_neigh->digipeat = NULL; nr_neigh->ax25 = NULL; nr_neigh->dev = dev; nr_neigh->quality = READ_ONCE(sysctl_netrom_default_path_quality); nr_neigh->locked = 0; nr_neigh->count = 0; nr_neigh->number = nr_neigh_no++; nr_neigh->failed = 0; refcount_set(&nr_neigh->refcount, 1); if (ax25_digi != NULL && ax25_digi->ndigi > 0) { nr_neigh->digipeat = kmemdup(ax25_digi, sizeof(*ax25_digi), GFP_KERNEL); if (nr_neigh->digipeat == NULL) { kfree(nr_neigh); if (nr_node) nr_node_put(nr_node); return -ENOMEM; } } spin_lock_bh(&nr_neigh_list_lock); hlist_add_head(&nr_neigh->neigh_node, &nr_neigh_list); nr_neigh_hold(nr_neigh); spin_unlock_bh(&nr_neigh_list_lock); } if (quality != 0 && ax25cmp(nr, ax25) == 0 && !nr_neigh->locked) nr_neigh->quality = quality; if (nr_node == NULL) { if ((nr_node = kmalloc(sizeof(*nr_node), GFP_ATOMIC)) == NULL) { if (nr_neigh) nr_neigh_put(nr_neigh); return -ENOMEM; } nr_node->callsign = *nr; strscpy(nr_node->mnemonic, mnemonic); nr_node->which = 0; nr_node->count = 1; refcount_set(&nr_node->refcount, 1); spin_lock_init(&nr_node->node_lock); nr_node->routes[0].quality = quality; nr_node->routes[0].obs_count = obs_count; nr_node->routes[0].neighbour = nr_neigh; nr_neigh_hold(nr_neigh); nr_neigh->count++; spin_lock_bh(&nr_node_list_lock); hlist_add_head(&nr_node->node_node, &nr_node_list); /* refcount initialized at 1 */ spin_unlock_bh(&nr_node_list_lock); nr_neigh_put(nr_neigh); return 0; } nr_node_lock(nr_node); if (quality != 0) strscpy(nr_node->mnemonic, mnemonic); for (found = 0, i = 0; i < nr_node->count; i++) { if (nr_node->routes[i].neighbour == nr_neigh) { nr_node->routes[i].quality = quality; nr_node->routes[i].obs_count = obs_count; found = 1; break; } } if (!found) { /* We have space at the bottom, slot it in */ if (nr_node->count < 3) { nr_node->routes[2] = nr_node->routes[1]; nr_node->routes[1] = nr_node->routes[0]; nr_node->routes[0].quality = quality; nr_node->routes[0].obs_count = obs_count; nr_node->routes[0].neighbour = nr_neigh; nr_node->which++; nr_node->count++; nr_neigh_hold(nr_neigh); nr_neigh->count++; } else { /* It must be better than the worst */ if (quality > nr_node->routes[2].quality) { nr_node->routes[2].neighbour->count--; nr_neigh_put(nr_node->routes[2].neighbour); if (nr_node->routes[2].neighbour->count == 0 && !nr_node->routes[2].neighbour->locked) nr_remove_neigh(nr_node->routes[2].neighbour); nr_node->routes[2].quality = quality; nr_node->routes[2].obs_count = obs_count; nr_node->routes[2].neighbour = nr_neigh; nr_neigh_hold(nr_neigh); nr_neigh->count++; } } } /* Now re-sort the routes in quality order */ switch (nr_node->count) { case 3: re_sort_routes(nr_node, 0, 1); re_sort_routes(nr_node, 1, 2); fallthrough; case 2: re_sort_routes(nr_node, 0, 1); break; case 1: break; } for (i = 0; i < nr_node->count; i++) { if (nr_node->routes[i].neighbour == nr_neigh) { if (i < nr_node->which) nr_node->which = i; break; } } nr_neigh_put(nr_neigh); nr_node_unlock(nr_node); nr_node_put(nr_node); return 0; } static void nr_remove_node_locked(struct nr_node *nr_node) { lockdep_assert_held(&nr_node_list_lock); hlist_del_init(&nr_node->node_node); nr_node_put(nr_node); } static inline void __nr_remove_neigh(struct nr_neigh *nr_neigh) { hlist_del_init(&nr_neigh->neigh_node); nr_neigh_put(nr_neigh); } #define nr_remove_neigh_locked(__neigh) \ __nr_remove_neigh(__neigh) static void nr_remove_neigh(struct nr_neigh *nr_neigh) { spin_lock_bh(&nr_neigh_list_lock); __nr_remove_neigh(nr_neigh); spin_unlock_bh(&nr_neigh_list_lock); } /* * "Delete" a node. Strictly speaking remove a route to a node. The node * is only deleted if no routes are left to it. */ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct net_device *dev) { struct nr_node *nr_node; struct nr_neigh *nr_neigh; int i; nr_node = nr_node_get(callsign); if (nr_node == NULL) return -EINVAL; nr_neigh = nr_neigh_get_dev(neighbour, dev); if (nr_neigh == NULL) { nr_node_put(nr_node); return -EINVAL; } spin_lock_bh(&nr_node_list_lock); nr_node_lock(nr_node); for (i = 0; i < nr_node->count; i++) { if (nr_node->routes[i].neighbour == nr_neigh) { nr_neigh->count--; nr_neigh_put(nr_neigh); if (nr_neigh->count == 0 && !nr_neigh->locked) nr_remove_neigh(nr_neigh); nr_neigh_put(nr_neigh); nr_node->count--; if (nr_node->count == 0) { nr_remove_node_locked(nr_node); } else { switch (i) { case 0: nr_node->routes[0] = nr_node->routes[1]; fallthrough; case 1: nr_node->routes[1] = nr_node->routes[2]; fallthrough; case 2: break; } nr_node_put(nr_node); } nr_node_unlock(nr_node); spin_unlock_bh(&nr_node_list_lock); return 0; } } nr_neigh_put(nr_neigh); nr_node_unlock(nr_node); spin_unlock_bh(&nr_node_list_lock); nr_node_put(nr_node); return -EINVAL; } /* * Lock a neighbour with a quality. */ static int __must_check nr_add_neigh(ax25_address *callsign, ax25_digi *ax25_digi, struct net_device *dev, unsigned int quality) { struct nr_neigh *nr_neigh; nr_neigh = nr_neigh_get_dev(callsign, dev); if (nr_neigh) { nr_neigh->quality = quality; nr_neigh->locked = 1; nr_neigh_put(nr_neigh); return 0; } if ((nr_neigh = kmalloc(sizeof(*nr_neigh), GFP_ATOMIC)) == NULL) return -ENOMEM; nr_neigh->callsign = *callsign; nr_neigh->digipeat = NULL; nr_neigh->ax25 = NULL; nr_neigh->dev = dev; nr_neigh->quality = quality; nr_neigh->locked = 1; nr_neigh->count = 0; nr_neigh->number = nr_neigh_no++; nr_neigh->failed = 0; refcount_set(&nr_neigh->refcount, 1); if (ax25_digi != NULL && ax25_digi->ndigi > 0) { nr_neigh->digipeat = kmemdup(ax25_digi, sizeof(*ax25_digi), GFP_KERNEL); if (nr_neigh->digipeat == NULL) { kfree(nr_neigh); return -ENOMEM; } } spin_lock_bh(&nr_neigh_list_lock); hlist_add_head(&nr_neigh->neigh_node, &nr_neigh_list); /* refcount is initialized at 1 */ spin_unlock_bh(&nr_neigh_list_lock); return 0; } /* * "Delete" a neighbour. The neighbour is only removed if the number * of nodes that may use it is zero. */ static int nr_del_neigh(ax25_address *callsign, struct net_device *dev, unsigned int quality) { struct nr_neigh *nr_neigh; nr_neigh = nr_neigh_get_dev(callsign, dev); if (nr_neigh == NULL) return -EINVAL; nr_neigh->quality = quality; nr_neigh->locked = 0; if (nr_neigh->count == 0) nr_remove_neigh(nr_neigh); nr_neigh_put(nr_neigh); return 0; } /* * Decrement the obsolescence count by one. If a route is reduced to a * count of zero, remove it. Also remove any unlocked neighbours with * zero nodes routing via it. */ static int nr_dec_obs(void) { struct nr_neigh *nr_neigh; struct nr_node *s; struct hlist_node *nodet; int i; spin_lock_bh(&nr_node_list_lock); nr_node_for_each_safe(s, nodet, &nr_node_list) { nr_node_lock(s); for (i = 0; i < s->count; i++) { switch (s->routes[i].obs_count) { case 0: /* A locked entry */ break; case 1: /* From 1 -> 0 */ nr_neigh = s->routes[i].neighbour; nr_neigh->count--; nr_neigh_put(nr_neigh); if (nr_neigh->count == 0 && !nr_neigh->locked) nr_remove_neigh(nr_neigh); s->count--; switch (i) { case 0: s->routes[0] = s->routes[1]; fallthrough; case 1: s->routes[1] = s->routes[2]; break; case 2: break; } break; default: s->routes[i].obs_count--; break; } } if (s->count <= 0) nr_remove_node_locked(s); nr_node_unlock(s); } spin_unlock_bh(&nr_node_list_lock); return 0; } /* * A device has been removed. Remove its routes and neighbours. */ void nr_rt_device_down(struct net_device *dev) { struct nr_neigh *s; struct hlist_node *nodet, *node2t; struct nr_node *t; int i; spin_lock_bh(&nr_neigh_list_lock); nr_neigh_for_each_safe(s, nodet, &nr_neigh_list) { if (s->dev == dev) { spin_lock_bh(&nr_node_list_lock); nr_node_for_each_safe(t, node2t, &nr_node_list) { nr_node_lock(t); for (i = 0; i < t->count; i++) { if (t->routes[i].neighbour == s) { t->count--; switch (i) { case 0: t->routes[0] = t->routes[1]; fallthrough; case 1: t->routes[1] = t->routes[2]; break; case 2: break; } } } if (t->count <= 0) nr_remove_node_locked(t); nr_node_unlock(t); } spin_unlock_bh(&nr_node_list_lock); nr_remove_neigh_locked(s); } } spin_unlock_bh(&nr_neigh_list_lock); } /* * Check that the device given is a valid AX.25 interface that is "up". * Or a valid ethernet interface with an AX.25 callsign binding. */ static struct net_device *nr_ax25_dev_get(char *devname) { struct net_device *dev; if ((dev = dev_get_by_name(&init_net, devname)) == NULL) return NULL; if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25) return dev; dev_put(dev); return NULL; } /* * Find the first active NET/ROM device, usually "nr0". */ struct net_device *nr_dev_first(void) { struct net_device *dev, *first = NULL; rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM) if (first == NULL || strncmp(dev->name, first->name, 3) < 0) first = dev; } dev_hold(first); rcu_read_unlock(); return first; } /* * Find the NET/ROM device for the given callsign. */ struct net_device *nr_dev_get(ax25_address *addr) { struct net_device *dev; rcu_read_lock(); for_each_netdev_rcu(&init_net, dev) { if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM && ax25cmp(addr, (const ax25_address *)dev->dev_addr) == 0) { dev_hold(dev); goto out; } } dev = NULL; out: rcu_read_unlock(); return dev; } static ax25_digi *nr_call_to_digi(ax25_digi *digi, int ndigis, ax25_address *digipeaters) { int i; if (ndigis == 0) return NULL; for (i = 0; i < ndigis; i++) { digi->calls[i] = digipeaters[i]; digi->repeated[i] = 0; } digi->ndigi = ndigis; digi->lastrepeat = -1; return digi; } /* * Handle the ioctls that control the routing functions. */ int nr_rt_ioctl(unsigned int cmd, void __user *arg) { struct nr_route_struct nr_route; struct net_device *dev; ax25_digi digi; int ret; switch (cmd) { case SIOCADDRT: if (copy_from_user(&nr_route, arg, sizeof(struct nr_route_struct))) return -EFAULT; if (nr_route.ndigis > AX25_MAX_DIGIS) return -EINVAL; if ((dev = nr_ax25_dev_get(nr_route.device)) == NULL) return -EINVAL; switch (nr_route.type) { case NETROM_NODE: if (strnlen(nr_route.mnemonic, 7) == 7) { ret = -EINVAL; break; } ret = nr_add_node(&nr_route.callsign, nr_route.mnemonic, &nr_route.neighbour, nr_call_to_digi(&digi, nr_route.ndigis, nr_route.digipeaters), dev, nr_route.quality, nr_route.obs_count); break; case NETROM_NEIGH: ret = nr_add_neigh(&nr_route.callsign, nr_call_to_digi(&digi, nr_route.ndigis, nr_route.digipeaters), dev, nr_route.quality); break; default: ret = -EINVAL; } dev_put(dev); return ret; case SIOCDELRT: if (copy_from_user(&nr_route, arg, sizeof(struct nr_route_struct))) return -EFAULT; if ((dev = nr_ax25_dev_get(nr_route.device)) == NULL) return -EINVAL; switch (nr_route.type) { case NETROM_NODE: ret = nr_del_node(&nr_route.callsign, &nr_route.neighbour, dev); break; case NETROM_NEIGH: ret = nr_del_neigh(&nr_route.callsign, dev, nr_route.quality); break; default: ret = -EINVAL; } dev_put(dev); return ret; case SIOCNRDECOBS: return nr_dec_obs(); default: return -EINVAL; } return 0; } /* * A level 2 link has timed out, therefore it appears to be a poor link, * then don't use that neighbour until it is reset. */ void nr_link_failed(ax25_cb *ax25, int reason) { struct nr_neigh *s, *nr_neigh = NULL; struct nr_node *nr_node = NULL; spin_lock_bh(&nr_neigh_list_lock); nr_neigh_for_each(s, &nr_neigh_list) { if (s->ax25 == ax25) { nr_neigh_hold(s); nr_neigh = s; break; } } spin_unlock_bh(&nr_neigh_list_lock); if (nr_neigh == NULL) return; nr_neigh->ax25 = NULL; ax25_cb_put(ax25); if (++nr_neigh->failed < READ_ONCE(sysctl_netrom_link_fails_count)) { nr_neigh_put(nr_neigh); return; } spin_lock_bh(&nr_node_list_lock); nr_node_for_each(nr_node, &nr_node_list) { nr_node_lock(nr_node); if (nr_node->which < nr_node->count && nr_node->routes[nr_node->which].neighbour == nr_neigh) nr_node->which++; nr_node_unlock(nr_node); } spin_unlock_bh(&nr_node_list_lock); nr_neigh_put(nr_neigh); } /* * Route a frame to an appropriate AX.25 connection. A NULL ax25_cb * indicates an internally generated frame. */ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25) { ax25_address *nr_src, *nr_dest; struct nr_neigh *nr_neigh; struct nr_node *nr_node; struct net_device *dev; unsigned char *dptr; ax25_cb *ax25s; int ret; struct sk_buff *skbn; /* * Reject malformed packets early. Check that it contains at least 2 * addresses and 1 byte more for Time-To-Live */ if (skb->len < 2 * sizeof(ax25_address) + 1) return 0; nr_src = (ax25_address *)(skb->data + 0); nr_dest = (ax25_address *)(skb->data + 7); if (ax25 != NULL) { ret = nr_add_node(nr_src, "", &ax25->dest_addr, ax25->digipeat, ax25->ax25_dev->dev, 0, READ_ONCE(sysctl_netrom_obsolescence_count_initialiser)); if (ret) return ret; } if ((dev = nr_dev_get(nr_dest)) != NULL) { /* Its for me */ if (ax25 == NULL) /* Its from me */ ret = nr_loopback_queue(skb); else ret = nr_rx_frame(skb, dev); dev_put(dev); return ret; } if (!READ_ONCE(sysctl_netrom_routing_control) && ax25 != NULL) return 0; /* Its Time-To-Live has expired */ if (skb->data[14] == 1) { return 0; } nr_node = nr_node_get(nr_dest); if (nr_node == NULL) return 0; nr_node_lock(nr_node); if (nr_node->which >= nr_node->count) { nr_node_unlock(nr_node); nr_node_put(nr_node); return 0; } nr_neigh = nr_node->routes[nr_node->which].neighbour; if ((dev = nr_dev_first()) == NULL) { nr_node_unlock(nr_node); nr_node_put(nr_node); return 0; } /* We are going to change the netrom headers so we should get our own skb, we also did not know until now how much header space we had to reserve... - RXQ */ if ((skbn=skb_copy_expand(skb, dev->hard_header_len, 0, GFP_ATOMIC)) == NULL) { nr_node_unlock(nr_node); nr_node_put(nr_node); dev_put(dev); return 0; } kfree_skb(skb); skb=skbn; skb->data[14]--; dptr = skb_push(skb, 1); *dptr = AX25_P_NETROM; ax25s = nr_neigh->ax25; nr_neigh->ax25 = ax25_send_frame(skb, 256, (const ax25_address *)dev->dev_addr, &nr_neigh->callsign, nr_neigh->digipeat, nr_neigh->dev); if (ax25s) ax25_cb_put(ax25s); dev_put(dev); ret = (nr_neigh->ax25 != NULL); nr_node_unlock(nr_node); nr_node_put(nr_node); return ret; } #ifdef CONFIG_PROC_FS static void *nr_node_start(struct seq_file *seq, loff_t *pos) __acquires(&nr_node_list_lock) { spin_lock_bh(&nr_node_list_lock); return seq_hlist_start_head(&nr_node_list, *pos); } static void *nr_node_next(struct seq_file *seq, void *v, loff_t *pos) { return seq_hlist_next(v, &nr_node_list, pos); } static void nr_node_stop(struct seq_file *seq, void *v) __releases(&nr_node_list_lock) { spin_unlock_bh(&nr_node_list_lock); } static int nr_node_show(struct seq_file *seq, void *v) { char buf[11]; int i; if (v == SEQ_START_TOKEN) seq_puts(seq, "callsign mnemonic w n qual obs neigh qual obs neigh qual obs neigh\n"); else { struct nr_node *nr_node = hlist_entry(v, struct nr_node, node_node); nr_node_lock(nr_node); seq_printf(seq, "%-9s %-7s %d %d", ax2asc(buf, &nr_node->callsign), (nr_node->mnemonic[0] == '\0') ? "*" : nr_node->mnemonic, nr_node->which + 1, nr_node->count); for (i = 0; i < nr_node->count; i++) { seq_printf(seq, " %3d %d %05d", nr_node->routes[i].quality, nr_node->routes[i].obs_count, nr_node->routes[i].neighbour->number); } nr_node_unlock(nr_node); seq_puts(seq, "\n"); } return 0; } const struct seq_operations nr_node_seqops = { .start = nr_node_start, .next = nr_node_next, .stop = nr_node_stop, .show = nr_node_show, }; static void *nr_neigh_start(struct seq_file *seq, loff_t *pos) __acquires(&nr_neigh_list_lock) { spin_lock_bh(&nr_neigh_list_lock); return seq_hlist_start_head(&nr_neigh_list, *pos); } static void *nr_neigh_next(struct seq_file *seq, void *v, loff_t *pos) { return seq_hlist_next(v, &nr_neigh_list, pos); } static void nr_neigh_stop(struct seq_file *seq, void *v) __releases(&nr_neigh_list_lock) { spin_unlock_bh(&nr_neigh_list_lock); } static int nr_neigh_show(struct seq_file *seq, void *v) { char buf[11]; int i; if (v == SEQ_START_TOKEN) seq_puts(seq, "addr callsign dev qual lock count failed digipeaters\n"); else { struct nr_neigh *nr_neigh; nr_neigh = hlist_entry(v, struct nr_neigh, neigh_node); seq_printf(seq, "%05d %-9s %-4s %3d %d %3d %3d", nr_neigh->number, ax2asc(buf, &nr_neigh->callsign), nr_neigh->dev ? nr_neigh->dev->name : "???", nr_neigh->quality, nr_neigh->locked, nr_neigh->count, nr_neigh->failed); if (nr_neigh->digipeat != NULL) { for (i = 0; i < nr_neigh->digipeat->ndigi; i++) seq_printf(seq, " %s", ax2asc(buf, &nr_neigh->digipeat->calls[i])); } seq_puts(seq, "\n"); } return 0; } const struct seq_operations nr_neigh_seqops = { .start = nr_neigh_start, .next = nr_neigh_next, .stop = nr_neigh_stop, .show = nr_neigh_show, }; #endif /* * Free all memory associated with the nodes and routes lists. */ void nr_rt_free(void) { struct nr_neigh *s = NULL; struct nr_node *t = NULL; struct hlist_node *nodet; spin_lock_bh(&nr_neigh_list_lock); spin_lock_bh(&nr_node_list_lock); nr_node_for_each_safe(t, nodet, &nr_node_list) { nr_node_lock(t); nr_remove_node_locked(t); nr_node_unlock(t); } nr_neigh_for_each_safe(s, nodet, &nr_neigh_list) { while(s->count) { s->count--; nr_neigh_put(s); } nr_remove_neigh_locked(s); } spin_unlock_bh(&nr_node_list_lock); spin_unlock_bh(&nr_neigh_list_lock); }
19 16 2 4 4 4 12 44 27 27 1 31 4 27 2 2 2 2 2 2 2 8 6 31 28 2 13 1 1 11 11 6 1 1 3 2 1 2 1 2 2 1 3 2 1 1 1 5 4 1 3 1 2 8 1 2 5 7 7 3 2 2 24 1 3 1 4 1 4 10 5 1 1 1 1 1 116 104 3 1 4 1 1 4 5 5 10 9 1 10 1 6 1 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 1991, 1992 Linus Torvalds * * Added support for a Unix98-style ptmx device. * -- C. Scott Ananian <cananian@alumni.princeton.edu>, 14-Jan-1998 * */ #include <linux/module.h> #include <linux/errno.h> #include <linux/interrupt.h> #include <linux/tty.h> #include <linux/tty_flip.h> #include <linux/fcntl.h> #include <linux/sched/signal.h> #include <linux/string.h> #include <linux/major.h> #include <linux/mm.h> #include <linux/init.h> #include <linux/device.h> #include <linux/uaccess.h> #include <linux/bitops.h> #include <linux/devpts_fs.h> #include <linux/slab.h> #include <linux/mutex.h> #include <linux/poll.h> #include <linux/mount.h> #include <linux/file.h> #include <linux/ioctl.h> #include <linux/compat.h> #include "tty.h" #undef TTY_DEBUG_HANGUP #ifdef TTY_DEBUG_HANGUP # define tty_debug_hangup(tty, f, args...) tty_debug(tty, f, ##args) #else # define tty_debug_hangup(tty, f, args...) do {} while (0) #endif #ifdef CONFIG_UNIX98_PTYS static struct tty_driver *ptm_driver; static struct tty_driver *pts_driver; static DEFINE_MUTEX(devpts_mutex); #endif static void pty_close(struct tty_struct *tty, struct file *filp) { if (tty->driver->subtype == PTY_TYPE_MASTER) WARN_ON(tty->count > 1); else { if (tty_io_error(tty)) return; if (tty->count > 2) return; } set_bit(TTY_IO_ERROR, &tty->flags); wake_up_interruptible(&tty->read_wait); wake_up_interruptible(&tty->write_wait); spin_lock_irq(&tty->ctrl.lock); tty->ctrl.packet = false; spin_unlock_irq(&tty->ctrl.lock); /* Review - krefs on tty_link ?? */ if (!tty->link) return; set_bit(TTY_OTHER_CLOSED, &tty->link->flags); wake_up_interruptible(&tty->link->read_wait); wake_up_interruptible(&tty->link->write_wait); if (tty->driver->subtype == PTY_TYPE_MASTER) { set_bit(TTY_OTHER_CLOSED, &tty->flags); #ifdef CONFIG_UNIX98_PTYS if (tty->driver == ptm_driver) { mutex_lock(&devpts_mutex); if (tty->link->driver_data) devpts_pty_kill(tty->link->driver_data); mutex_unlock(&devpts_mutex); } #endif tty_vhangup(tty->link); } } /* * The unthrottle routine is called by the line discipline to signal * that it can receive more characters. For PTY's, the TTY_THROTTLED * flag is always set, to force the line discipline to always call the * unthrottle routine when there are fewer than TTY_THRESHOLD_UNTHROTTLE * characters in the queue. This is necessary since each time this * happens, we need to wake up any sleeping processes that could be * (1) trying to send data to the pty, or (2) waiting in wait_until_sent() * for the pty buffer to be drained. */ static void pty_unthrottle(struct tty_struct *tty) { tty_wakeup(tty->link); set_bit(TTY_THROTTLED, &tty->flags); } /** * pty_write - write to a pty * @tty: the tty we write from * @buf: kernel buffer of data * @c: bytes to write * * Our "hardware" write method. Data is coming from the ldisc which * may be in a non sleeping state. We simply throw this at the other * end of the link as if we were an IRQ handler receiving stuff for * the other side of the pty/tty pair. */ static ssize_t pty_write(struct tty_struct *tty, const u8 *buf, size_t c) { struct tty_struct *to = tty->link; if (tty->flow.stopped || !c) return 0; return tty_insert_flip_string_and_push_buffer(to->port, buf, c); } /** * pty_write_room - write space * @tty: tty we are writing from * * Report how many bytes the ldisc can send into the queue for * the other device. */ static unsigned int pty_write_room(struct tty_struct *tty) { if (tty->flow.stopped) return 0; return tty_buffer_space_avail(tty->link->port); } /* Set the lock flag on a pty */ static int pty_set_lock(struct tty_struct *tty, int __user *arg) { int val; if (get_user(val, arg)) return -EFAULT; if (val) set_bit(TTY_PTY_LOCK, &tty->flags); else clear_bit(TTY_PTY_LOCK, &tty->flags); return 0; } static int pty_get_lock(struct tty_struct *tty, int __user *arg) { int locked = test_bit(TTY_PTY_LOCK, &tty->flags); return put_user(locked, arg); } /* Set the packet mode on a pty */ static int pty_set_pktmode(struct tty_struct *tty, int __user *arg) { int pktmode; if (get_user(pktmode, arg)) return -EFAULT; spin_lock_irq(&tty->ctrl.lock); if (pktmode) { if (!tty->ctrl.packet) { tty->link->ctrl.pktstatus = 0; smp_mb(); tty->ctrl.packet = true; } } else tty->ctrl.packet = false; spin_unlock_irq(&tty->ctrl.lock); return 0; } /* Get the packet mode of a pty */ static int pty_get_pktmode(struct tty_struct *tty, int __user *arg) { int pktmode = tty->ctrl.packet; return put_user(pktmode, arg); } /* Send a signal to the slave */ static int pty_signal(struct tty_struct *tty, int sig) { struct pid *pgrp; if (sig != SIGINT && sig != SIGQUIT && sig != SIGTSTP) return -EINVAL; if (tty->link) { pgrp = tty_get_pgrp(tty->link); if (pgrp) kill_pgrp(pgrp, sig, 1); put_pid(pgrp); } return 0; } static void pty_flush_buffer(struct tty_struct *tty) { struct tty_struct *to = tty->link; if (!to) return; tty_buffer_flush(to, NULL); if (to->ctrl.packet) { spin_lock_irq(&tty->ctrl.lock); tty->ctrl.pktstatus |= TIOCPKT_FLUSHWRITE; wake_up_interruptible(&to->read_wait); spin_unlock_irq(&tty->ctrl.lock); } } static int pty_open(struct tty_struct *tty, struct file *filp) { if (!tty || !tty->link) return -ENODEV; if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) goto out; if (test_bit(TTY_PTY_LOCK, &tty->link->flags)) goto out; if (tty->driver->subtype == PTY_TYPE_SLAVE && tty->link->count != 1) goto out; clear_bit(TTY_IO_ERROR, &tty->flags); clear_bit(TTY_OTHER_CLOSED, &tty->link->flags); set_bit(TTY_THROTTLED, &tty->flags); return 0; out: set_bit(TTY_IO_ERROR, &tty->flags); return -EIO; } static void pty_set_termios(struct tty_struct *tty, const struct ktermios *old_termios) { /* See if packet mode change of state. */ if (tty->link && tty->link->ctrl.packet) { int extproc = (old_termios->c_lflag & EXTPROC) | L_EXTPROC(tty); int old_flow = ((old_termios->c_iflag & IXON) && (old_termios->c_cc[VSTOP] == '\023') && (old_termios->c_cc[VSTART] == '\021')); int new_flow = (I_IXON(tty) && STOP_CHAR(tty) == '\023' && START_CHAR(tty) == '\021'); if ((old_flow != new_flow) || extproc) { spin_lock_irq(&tty->ctrl.lock); if (old_flow != new_flow) { tty->ctrl.pktstatus &= ~(TIOCPKT_DOSTOP | TIOCPKT_NOSTOP); if (new_flow) tty->ctrl.pktstatus |= TIOCPKT_DOSTOP; else tty->ctrl.pktstatus |= TIOCPKT_NOSTOP; } if (extproc) tty->ctrl.pktstatus |= TIOCPKT_IOCTL; spin_unlock_irq(&tty->ctrl.lock); wake_up_interruptible(&tty->link->read_wait); } } tty->termios.c_cflag &= ~(CSIZE | PARENB); tty->termios.c_cflag |= (CS8 | CREAD); } /** * pty_resize - resize event * @tty: tty being resized * @ws: window size being set. * * Update the termios variables and send the necessary signals to * peform a terminal resize correctly */ static int pty_resize(struct tty_struct *tty, struct winsize *ws) { struct pid *pgrp, *rpgrp; struct tty_struct *pty = tty->link; /* For a PTY we need to lock the tty side */ mutex_lock(&tty->winsize_mutex); if (!memcmp(ws, &tty->winsize, sizeof(*ws))) goto done; /* Signal the foreground process group of both ptys */ pgrp = tty_get_pgrp(tty); rpgrp = tty_get_pgrp(pty); if (pgrp) kill_pgrp(pgrp, SIGWINCH, 1); if (rpgrp != pgrp && rpgrp) kill_pgrp(rpgrp, SIGWINCH, 1); put_pid(pgrp); put_pid(rpgrp); tty->winsize = *ws; pty->winsize = *ws; /* Never used so will go away soon */ done: mutex_unlock(&tty->winsize_mutex); return 0; } /** * pty_start - start() handler * pty_stop - stop() handler * @tty: tty being flow-controlled * * Propagates the TIOCPKT status to the master pty. * * NB: only the master pty can be in packet mode so only the slave * needs start()/stop() handlers */ static void pty_start(struct tty_struct *tty) { unsigned long flags; if (tty->link && tty->link->ctrl.packet) { spin_lock_irqsave(&tty->ctrl.lock, flags); tty->ctrl.pktstatus &= ~TIOCPKT_STOP; tty->ctrl.pktstatus |= TIOCPKT_START; spin_unlock_irqrestore(&tty->ctrl.lock, flags); wake_up_interruptible_poll(&tty->link->read_wait, EPOLLIN); } } static void pty_stop(struct tty_struct *tty) { unsigned long flags; if (tty->link && tty->link->ctrl.packet) { spin_lock_irqsave(&tty->ctrl.lock, flags); tty->ctrl.pktstatus &= ~TIOCPKT_START; tty->ctrl.pktstatus |= TIOCPKT_STOP; spin_unlock_irqrestore(&tty->ctrl.lock, flags); wake_up_interruptible_poll(&tty->link->read_wait, EPOLLIN); } } /** * pty_common_install - set up the pty pair * @driver: the pty driver * @tty: the tty being instantiated * @legacy: true if this is BSD style * * Perform the initial set up for the tty/pty pair. Called from the * tty layer when the port is first opened. * * Locking: the caller must hold the tty_mutex */ static int pty_common_install(struct tty_driver *driver, struct tty_struct *tty, bool legacy) { struct tty_struct *o_tty; struct tty_port *ports[2]; int idx = tty->index; int retval = -ENOMEM; /* Opening the slave first has always returned -EIO */ if (driver->subtype != PTY_TYPE_MASTER) return -EIO; ports[0] = kmalloc(sizeof **ports, GFP_KERNEL); ports[1] = kmalloc(sizeof **ports, GFP_KERNEL); if (!ports[0] || !ports[1]) goto err; if (!try_module_get(driver->other->owner)) { /* This cannot in fact currently happen */ goto err; } o_tty = alloc_tty_struct(driver->other, idx); if (!o_tty) goto err_put_module; tty_set_lock_subclass(o_tty); lockdep_set_subclass(&o_tty->termios_rwsem, TTY_LOCK_SLAVE); if (legacy) { /* We always use new tty termios data so we can do this the easy way .. */ tty_init_termios(tty); tty_init_termios(o_tty); driver->other->ttys[idx] = o_tty; driver->ttys[idx] = tty; } else { memset(&tty->termios_locked, 0, sizeof(tty->termios_locked)); tty->termios = driver->init_termios; memset(&o_tty->termios_locked, 0, sizeof(tty->termios_locked)); o_tty->termios = driver->other->init_termios; } /* * Everything allocated ... set up the o_tty structure. */ tty_driver_kref_get(driver->other); /* Establish the links in both directions */ tty->link = o_tty; o_tty->link = tty; tty_port_init(ports[0]); tty_port_init(ports[1]); tty_buffer_set_limit(ports[0], 8192); tty_buffer_set_limit(ports[1], 8192); o_tty->port = ports[0]; tty->port = ports[1]; o_tty->port->itty = o_tty; tty_buffer_set_lock_subclass(o_tty->port); tty_driver_kref_get(driver); tty->count++; o_tty->count++; return 0; err_put_module: module_put(driver->other->owner); err: kfree(ports[0]); kfree(ports[1]); return retval; } static void pty_cleanup(struct tty_struct *tty) { tty_port_put(tty->port); } /* Traditional BSD devices */ #ifdef CONFIG_LEGACY_PTYS static int pty_install(struct tty_driver *driver, struct tty_struct *tty) { return pty_common_install(driver, tty, true); } static void pty_remove(struct tty_driver *driver, struct tty_struct *tty) { struct tty_struct *pair = tty->link; driver->ttys[tty->index] = NULL; if (pair) pair->driver->ttys[pair->index] = NULL; } static int pty_bsd_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { switch (cmd) { case TIOCSPTLCK: /* Set PT Lock (disallow slave open) */ return pty_set_lock(tty, (int __user *) arg); case TIOCGPTLCK: /* Get PT Lock status */ return pty_get_lock(tty, (int __user *)arg); case TIOCPKT: /* Set PT packet mode */ return pty_set_pktmode(tty, (int __user *)arg); case TIOCGPKT: /* Get PT packet mode */ return pty_get_pktmode(tty, (int __user *)arg); case TIOCSIG: /* Send signal to other side of pty */ return pty_signal(tty, (int) arg); case TIOCGPTN: /* TTY returns ENOTTY, but glibc expects EINVAL here */ return -EINVAL; } return -ENOIOCTLCMD; } #ifdef CONFIG_COMPAT static long pty_bsd_compat_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { /* * PTY ioctls don't require any special translation between 32-bit and * 64-bit userspace, they are already compatible. */ return pty_bsd_ioctl(tty, cmd, (unsigned long)compat_ptr(arg)); } #else #define pty_bsd_compat_ioctl NULL #endif static int legacy_count = CONFIG_LEGACY_PTY_COUNT; /* * not really modular, but the easiest way to keep compat with existing * bootargs behaviour is to continue using module_param here. */ module_param(legacy_count, int, 0); /* * The master side of a pty can do TIOCSPTLCK and thus * has pty_bsd_ioctl. */ static const struct tty_operations master_pty_ops_bsd = { .install = pty_install, .open = pty_open, .close = pty_close, .write = pty_write, .write_room = pty_write_room, .flush_buffer = pty_flush_buffer, .unthrottle = pty_unthrottle, .ioctl = pty_bsd_ioctl, .compat_ioctl = pty_bsd_compat_ioctl, .cleanup = pty_cleanup, .resize = pty_resize, .remove = pty_remove }; static const struct tty_operations slave_pty_ops_bsd = { .install = pty_install, .open = pty_open, .close = pty_close, .write = pty_write, .write_room = pty_write_room, .flush_buffer = pty_flush_buffer, .unthrottle = pty_unthrottle, .set_termios = pty_set_termios, .cleanup = pty_cleanup, .resize = pty_resize, .start = pty_start, .stop = pty_stop, .remove = pty_remove }; static void __init legacy_pty_init(void) { struct tty_driver *pty_driver, *pty_slave_driver; if (legacy_count <= 0) return; pty_driver = tty_alloc_driver(legacy_count, TTY_DRIVER_RESET_TERMIOS | TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_ALLOC); if (IS_ERR(pty_driver)) panic("Couldn't allocate pty driver"); pty_slave_driver = tty_alloc_driver(legacy_count, TTY_DRIVER_RESET_TERMIOS | TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_ALLOC); if (IS_ERR(pty_slave_driver)) panic("Couldn't allocate pty slave driver"); pty_driver->driver_name = "pty_master"; pty_driver->name = "pty"; pty_driver->major = PTY_MASTER_MAJOR; pty_driver->minor_start = 0; pty_driver->type = TTY_DRIVER_TYPE_PTY; pty_driver->subtype = PTY_TYPE_MASTER; pty_driver->init_termios = tty_std_termios; pty_driver->init_termios.c_iflag = 0; pty_driver->init_termios.c_oflag = 0; pty_driver->init_termios.c_cflag = B38400 | CS8 | CREAD; pty_driver->init_termios.c_lflag = 0; pty_driver->init_termios.c_ispeed = 38400; pty_driver->init_termios.c_ospeed = 38400; pty_driver->other = pty_slave_driver; tty_set_operations(pty_driver, &master_pty_ops_bsd); pty_slave_driver->driver_name = "pty_slave"; pty_slave_driver->name = "ttyp"; pty_slave_driver->major = PTY_SLAVE_MAJOR; pty_slave_driver->minor_start = 0; pty_slave_driver->type = TTY_DRIVER_TYPE_PTY; pty_slave_driver->subtype = PTY_TYPE_SLAVE; pty_slave_driver->init_termios = tty_std_termios; pty_slave_driver->init_termios.c_cflag = B38400 | CS8 | CREAD; pty_slave_driver->init_termios.c_ispeed = 38400; pty_slave_driver->init_termios.c_ospeed = 38400; pty_slave_driver->other = pty_driver; tty_set_operations(pty_slave_driver, &slave_pty_ops_bsd); if (tty_register_driver(pty_driver)) panic("Couldn't register pty driver"); if (tty_register_driver(pty_slave_driver)) panic("Couldn't register pty slave driver"); } #else static inline void legacy_pty_init(void) { } #endif /* Unix98 devices */ #ifdef CONFIG_UNIX98_PTYS static struct cdev ptmx_cdev; /** * ptm_open_peer - open the peer of a pty * @master: the open struct file of the ptmx device node * @tty: the master of the pty being opened * @flags: the flags for open * * Provide a race free way for userspace to open the slave end of a pty * (where they have the master fd and cannot access or trust the mount * namespace /dev/pts was mounted inside). */ int ptm_open_peer(struct file *master, struct tty_struct *tty, int flags) { int fd; struct file *filp; int retval = -EINVAL; struct path path; if (tty->driver != ptm_driver) return -EIO; fd = get_unused_fd_flags(flags); if (fd < 0) { retval = fd; goto err; } /* Compute the slave's path */ path.mnt = devpts_mntget(master, tty->driver_data); if (IS_ERR(path.mnt)) { retval = PTR_ERR(path.mnt); goto err_put; } path.dentry = tty->link->driver_data; filp = dentry_open(&path, flags, current_cred()); mntput(path.mnt); if (IS_ERR(filp)) { retval = PTR_ERR(filp); goto err_put; } fd_install(fd, filp); return fd; err_put: put_unused_fd(fd); err: return retval; } static int pty_unix98_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { switch (cmd) { case TIOCSPTLCK: /* Set PT Lock (disallow slave open) */ return pty_set_lock(tty, (int __user *)arg); case TIOCGPTLCK: /* Get PT Lock status */ return pty_get_lock(tty, (int __user *)arg); case TIOCPKT: /* Set PT packet mode */ return pty_set_pktmode(tty, (int __user *)arg); case TIOCGPKT: /* Get PT packet mode */ return pty_get_pktmode(tty, (int __user *)arg); case TIOCGPTN: /* Get PT Number */ return put_user(tty->index, (unsigned int __user *)arg); case TIOCSIG: /* Send signal to other side of pty */ return pty_signal(tty, (int) arg); } return -ENOIOCTLCMD; } #ifdef CONFIG_COMPAT static long pty_unix98_compat_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { /* * PTY ioctls don't require any special translation between 32-bit and * 64-bit userspace, they are already compatible. */ return pty_unix98_ioctl(tty, cmd, cmd == TIOCSIG ? arg : (unsigned long)compat_ptr(arg)); } #else #define pty_unix98_compat_ioctl NULL #endif /** * ptm_unix98_lookup - find a pty master * @driver: ptm driver * @file: unused * @idx: tty index * * Look up a pty master device. Called under the tty_mutex for now. * This provides our locking. */ static struct tty_struct *ptm_unix98_lookup(struct tty_driver *driver, struct file *file, int idx) { /* Master must be open via /dev/ptmx */ return ERR_PTR(-EIO); } /** * pts_unix98_lookup - find a pty slave * @driver: pts driver * @file: file pointer to tty * @idx: tty index * * Look up a pty master device. Called under the tty_mutex for now. * This provides our locking for the tty pointer. */ static struct tty_struct *pts_unix98_lookup(struct tty_driver *driver, struct file *file, int idx) { struct tty_struct *tty; mutex_lock(&devpts_mutex); tty = devpts_get_priv(file->f_path.dentry); mutex_unlock(&devpts_mutex); /* Master must be open before slave */ if (!tty) return ERR_PTR(-EIO); return tty; } static int pty_unix98_install(struct tty_driver *driver, struct tty_struct *tty) { return pty_common_install(driver, tty, false); } /* this is called once with whichever end is closed last */ static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty) { struct pts_fs_info *fsi; if (tty->driver->subtype == PTY_TYPE_MASTER) fsi = tty->driver_data; else fsi = tty->link->driver_data; if (fsi) { devpts_kill_index(fsi, tty->index); devpts_release(fsi); } } static void pty_show_fdinfo(struct tty_struct *tty, struct seq_file *m) { seq_printf(m, "tty-index:\t%d\n", tty->index); } static const struct tty_operations ptm_unix98_ops = { .lookup = ptm_unix98_lookup, .install = pty_unix98_install, .remove = pty_unix98_remove, .open = pty_open, .close = pty_close, .write = pty_write, .write_room = pty_write_room, .flush_buffer = pty_flush_buffer, .unthrottle = pty_unthrottle, .ioctl = pty_unix98_ioctl, .compat_ioctl = pty_unix98_compat_ioctl, .resize = pty_resize, .cleanup = pty_cleanup, .show_fdinfo = pty_show_fdinfo, }; static const struct tty_operations pty_unix98_ops = { .lookup = pts_unix98_lookup, .install = pty_unix98_install, .remove = pty_unix98_remove, .open = pty_open, .close = pty_close, .write = pty_write, .write_room = pty_write_room, .flush_buffer = pty_flush_buffer, .unthrottle = pty_unthrottle, .set_termios = pty_set_termios, .start = pty_start, .stop = pty_stop, .cleanup = pty_cleanup, }; /** * ptmx_open - open a unix 98 pty master * @inode: inode of device file * @filp: file pointer to tty * * Allocate a unix98 pty master device from the ptmx driver. * * Locking: tty_mutex protects the init_dev work. tty->count should * protect the rest. * allocated_ptys_lock handles the list of free pty numbers */ static int ptmx_open(struct inode *inode, struct file *filp) { struct pts_fs_info *fsi; struct tty_struct *tty; struct dentry *dentry; int retval; int index; nonseekable_open(inode, filp); /* We refuse fsnotify events on ptmx, since it's a shared resource */ file_set_fsnotify_mode(filp, FMODE_NONOTIFY); retval = tty_alloc_file(filp); if (retval) return retval; fsi = devpts_acquire(filp); if (IS_ERR(fsi)) { retval = PTR_ERR(fsi); goto out_free_file; } /* find a device that is not in use. */ mutex_lock(&devpts_mutex); index = devpts_new_index(fsi); mutex_unlock(&devpts_mutex); retval = index; if (index < 0) goto out_put_fsi; mutex_lock(&tty_mutex); tty = tty_init_dev(ptm_driver, index); /* The tty returned here is locked so we can safely drop the mutex */ mutex_unlock(&tty_mutex); retval = PTR_ERR(tty); if (IS_ERR(tty)) goto out; /* * From here on out, the tty is "live", and the index and * fsi will be killed/put by the tty_release() */ set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */ tty->driver_data = fsi; tty_add_file(tty, filp); dentry = devpts_pty_new(fsi, index, tty->link); if (IS_ERR(dentry)) { retval = PTR_ERR(dentry); goto err_release; } tty->link->driver_data = dentry; retval = ptm_driver->ops->open(tty, filp); if (retval) goto err_release; tty_debug_hangup(tty, "opening (count=%d)\n", tty->count); tty_unlock(tty); return 0; err_release: tty_unlock(tty); // This will also put-ref the fsi tty_release(inode, filp); return retval; out: devpts_kill_index(fsi, index); out_put_fsi: devpts_release(fsi); out_free_file: tty_free_file(filp); return retval; } static struct file_operations ptmx_fops __ro_after_init; static void __init unix98_pty_init(void) { ptm_driver = tty_alloc_driver(NR_UNIX98_PTY_MAX, TTY_DRIVER_RESET_TERMIOS | TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV | TTY_DRIVER_DEVPTS_MEM | TTY_DRIVER_DYNAMIC_ALLOC); if (IS_ERR(ptm_driver)) panic("Couldn't allocate Unix98 ptm driver"); pts_driver = tty_alloc_driver(NR_UNIX98_PTY_MAX, TTY_DRIVER_RESET_TERMIOS | TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV | TTY_DRIVER_DEVPTS_MEM | TTY_DRIVER_DYNAMIC_ALLOC); if (IS_ERR(pts_driver)) panic("Couldn't allocate Unix98 pts driver"); ptm_driver->driver_name = "pty_master"; ptm_driver->name = "ptm"; ptm_driver->major = UNIX98_PTY_MASTER_MAJOR; ptm_driver->minor_start = 0; ptm_driver->type = TTY_DRIVER_TYPE_PTY; ptm_driver->subtype = PTY_TYPE_MASTER; ptm_driver->init_termios = tty_std_termios; ptm_driver->init_termios.c_iflag = 0; ptm_driver->init_termios.c_oflag = 0; ptm_driver->init_termios.c_cflag = B38400 | CS8 | CREAD; ptm_driver->init_termios.c_lflag = 0; ptm_driver->init_termios.c_ispeed = 38400; ptm_driver->init_termios.c_ospeed = 38400; ptm_driver->other = pts_driver; tty_set_operations(ptm_driver, &ptm_unix98_ops); pts_driver->driver_name = "pty_slave"; pts_driver->name = "pts"; pts_driver->major = UNIX98_PTY_SLAVE_MAJOR; pts_driver->minor_start = 0; pts_driver->type = TTY_DRIVER_TYPE_PTY; pts_driver->subtype = PTY_TYPE_SLAVE; pts_driver->init_termios = tty_std_termios; pts_driver->init_termios.c_cflag = B38400 | CS8 | CREAD; pts_driver->init_termios.c_ispeed = 38400; pts_driver->init_termios.c_ospeed = 38400; pts_driver->other = ptm_driver; tty_set_operations(pts_driver, &pty_unix98_ops); if (tty_register_driver(ptm_driver)) panic("Couldn't register Unix98 ptm driver"); if (tty_register_driver(pts_driver)) panic("Couldn't register Unix98 pts driver"); /* Now create the /dev/ptmx special device */ tty_default_fops(&ptmx_fops); ptmx_fops.open = ptmx_open; cdev_init(&ptmx_cdev, &ptmx_fops); if (cdev_add(&ptmx_cdev, MKDEV(TTYAUX_MAJOR, 2), 1) || register_chrdev_region(MKDEV(TTYAUX_MAJOR, 2), 1, "/dev/ptmx") < 0) panic("Couldn't register /dev/ptmx driver"); device_create(&tty_class, NULL, MKDEV(TTYAUX_MAJOR, 2), NULL, "ptmx"); } #else static inline void unix98_pty_init(void) { } #endif static int __init pty_init(void) { legacy_pty_init(); unix98_pty_init(); return 0; } device_initcall(pty_init);
3 2 1 2 2 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2007, 2008 Karsten Wiese <fzu@wemgehoertderstaat.de> */ #include <linux/slab.h> #include <linux/usb.h> #include <linux/usb/audio.h> #include <linux/module.h> #include <sound/core.h> #include <sound/hwdep.h> #include <sound/pcm.h> #include <sound/initval.h> #define MODNAME "US122L" #include "usb_stream.c" #include "../usbaudio.h" #include "../midi.h" #include "us122l.h" MODULE_AUTHOR("Karsten Wiese <fzu@wemgehoertderstaat.de>"); MODULE_DESCRIPTION("TASCAM "NAME_ALLCAPS" Version 0.5"); MODULE_LICENSE("GPL"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-max */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* Id for this card */ /* Enable this card */ static bool enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP; module_param_array(index, int, NULL, 0444); MODULE_PARM_DESC(index, "Index value for "NAME_ALLCAPS"."); module_param_array(id, charp, NULL, 0444); MODULE_PARM_DESC(id, "ID string for "NAME_ALLCAPS"."); module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable "NAME_ALLCAPS"."); /* driver_info flags */ #define US122L_FLAG_US144 BIT(0) static int snd_us122l_card_used[SNDRV_CARDS]; static int us122l_create_usbmidi(struct snd_card *card) { static const struct snd_usb_midi_endpoint_info quirk_data = { .out_ep = 4, .in_ep = 3, .out_cables = 0x001, .in_cables = 0x001 }; static const struct snd_usb_audio_quirk quirk = { .vendor_name = "US122L", .product_name = NAME_ALLCAPS, .ifnum = 1, .type = QUIRK_MIDI_US122L, .data = &quirk_data }; struct usb_device *dev = US122L(card)->dev; struct usb_interface *iface = usb_ifnum_to_if(dev, 1); return snd_usbmidi_create(card, iface, &US122L(card)->midi_list, &quirk); } static int us144_create_usbmidi(struct snd_card *card) { static const struct snd_usb_midi_endpoint_info quirk_data = { .out_ep = 4, .in_ep = 3, .out_cables = 0x001, .in_cables = 0x001 }; static const struct snd_usb_audio_quirk quirk = { .vendor_name = "US144", .product_name = NAME_ALLCAPS, .ifnum = 0, .type = QUIRK_MIDI_US122L, .data = &quirk_data }; struct usb_device *dev = US122L(card)->dev; struct usb_interface *iface = usb_ifnum_to_if(dev, 0); return snd_usbmidi_create(card, iface, &US122L(card)->midi_list, &quirk); } static void pt_info_set(struct usb_device *dev, u8 v) { usb_control_msg_send(dev, 0, 'I', USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, v, 0, NULL, 0, 1000, GFP_NOIO); } static vm_fault_t usb_stream_hwdep_vm_fault(struct vm_fault *vmf) { unsigned long offset; struct page *page; void *vaddr; struct us122l *us122l = vmf->vma->vm_private_data; struct usb_stream *s; guard(mutex)(&us122l->mutex); s = us122l->sk.s; if (!s) return VM_FAULT_SIGBUS; offset = vmf->pgoff << PAGE_SHIFT; if (offset < PAGE_ALIGN(s->read_size)) { vaddr = (char *)s + offset; } else { offset -= PAGE_ALIGN(s->read_size); if (offset >= PAGE_ALIGN(s->write_size)) return VM_FAULT_SIGBUS; vaddr = us122l->sk.write_page + offset; } page = virt_to_page(vaddr); get_page(page); vmf->page = page; return 0; } static const struct vm_operations_struct usb_stream_hwdep_vm_ops = { .fault = usb_stream_hwdep_vm_fault, }; static int usb_stream_hwdep_open(struct snd_hwdep *hw, struct file *file) { struct us122l *us122l = hw->private_data; struct usb_interface *iface; if (hw->used >= 2) return -EBUSY; if (!us122l->first) us122l->first = file; if (us122l->is_us144) { iface = usb_ifnum_to_if(us122l->dev, 0); usb_autopm_get_interface(iface); } iface = usb_ifnum_to_if(us122l->dev, 1); usb_autopm_get_interface(iface); return 0; } static int usb_stream_hwdep_release(struct snd_hwdep *hw, struct file *file) { struct us122l *us122l = hw->private_data; struct usb_interface *iface; if (us122l->is_us144) { iface = usb_ifnum_to_if(us122l->dev, 0); usb_autopm_put_interface(iface); } iface = usb_ifnum_to_if(us122l->dev, 1); usb_autopm_put_interface(iface); if (us122l->first == file) us122l->first = NULL; guard(mutex)(&us122l->mutex); if (us122l->master == file) us122l->master = us122l->slave; us122l->slave = NULL; return 0; } static int usb_stream_hwdep_mmap(struct snd_hwdep *hw, struct file *filp, struct vm_area_struct *area) { unsigned long size = area->vm_end - area->vm_start; struct us122l *us122l = hw->private_data; unsigned long offset; struct usb_stream *s; bool read; offset = area->vm_pgoff << PAGE_SHIFT; guard(mutex)(&us122l->mutex); s = us122l->sk.s; read = offset < s->read_size; if (read && area->vm_flags & VM_WRITE) return -EPERM; /* if userspace tries to mmap beyond end of our buffer, fail */ if (size > PAGE_ALIGN(read ? s->read_size : s->write_size)) { dev_warn(hw->card->dev, "%s: size %lu > %u\n", __func__, size, read ? s->read_size : s->write_size); return -EINVAL; } area->vm_ops = &usb_stream_hwdep_vm_ops; vm_flags_set(area, VM_DONTDUMP); if (!read) vm_flags_set(area, VM_DONTEXPAND); area->vm_private_data = us122l; return 0; } static __poll_t usb_stream_hwdep_poll(struct snd_hwdep *hw, struct file *file, poll_table *wait) { struct us122l *us122l = hw->private_data; unsigned int *polled; __poll_t mask; poll_wait(file, &us122l->sk.sleep, wait); mask = EPOLLIN | EPOLLOUT | EPOLLWRNORM | EPOLLERR; if (mutex_trylock(&us122l->mutex)) { struct usb_stream *s = us122l->sk.s; if (s && s->state == usb_stream_ready) { if (us122l->first == file) polled = &s->periods_polled; else polled = &us122l->second_periods_polled; if (*polled != s->periods_done) { *polled = s->periods_done; mask = EPOLLIN | EPOLLOUT | EPOLLWRNORM; } else { mask = 0; } } mutex_unlock(&us122l->mutex); } return mask; } static void us122l_stop(struct us122l *us122l) { struct list_head *p; list_for_each(p, &us122l->midi_list) snd_usbmidi_input_stop(p); usb_stream_stop(&us122l->sk); usb_stream_free(&us122l->sk); } static int us122l_set_sample_rate(struct usb_device *dev, int rate) { unsigned int ep = 0x81; unsigned char data[3]; int err; data[0] = rate; data[1] = rate >> 8; data[2] = rate >> 16; err = usb_control_msg_send(dev, 0, UAC_SET_CUR, USB_TYPE_CLASS | USB_RECIP_ENDPOINT | USB_DIR_OUT, UAC_EP_CS_ATTR_SAMPLE_RATE << 8, ep, data, 3, 1000, GFP_NOIO); if (err) dev_err(&dev->dev, "%d: cannot set freq %d to ep 0x%x\n", dev->devnum, rate, ep); return err; } static bool us122l_start(struct us122l *us122l, unsigned int rate, unsigned int period_frames) { struct list_head *p; int err; unsigned int use_packsize = 0; bool success = false; if (us122l->dev->speed == USB_SPEED_HIGH) { /* The us-122l's descriptor defaults to iso max_packsize 78, which isn't needed for samplerates <= 48000. Lets save some memory: */ switch (rate) { case 44100: use_packsize = 36; break; case 48000: use_packsize = 42; break; case 88200: use_packsize = 72; break; } } if (!usb_stream_new(&us122l->sk, us122l->dev, 1, 2, rate, use_packsize, period_frames, 6)) goto out; err = us122l_set_sample_rate(us122l->dev, rate); if (err < 0) { us122l_stop(us122l); dev_err(&us122l->dev->dev, "us122l_set_sample_rate error\n"); goto out; } err = usb_stream_start(&us122l->sk); if (err < 0) { us122l_stop(us122l); dev_err(&us122l->dev->dev, "%s error %i\n", __func__, err); goto out; } list_for_each(p, &us122l->midi_list) snd_usbmidi_input_start(p); success = true; out: return success; } static int usb_stream_hwdep_ioctl(struct snd_hwdep *hw, struct file *file, unsigned int cmd, unsigned long arg) { struct usb_stream_config cfg; struct us122l *us122l = hw->private_data; struct usb_stream *s; unsigned int min_period_frames; int err = 0; bool high_speed; if (cmd != SNDRV_USB_STREAM_IOCTL_SET_PARAMS) return -ENOTTY; if (copy_from_user(&cfg, (void __user *)arg, sizeof(cfg))) return -EFAULT; if (cfg.version != USB_STREAM_INTERFACE_VERSION) return -ENXIO; high_speed = us122l->dev->speed == USB_SPEED_HIGH; if ((cfg.sample_rate != 44100 && cfg.sample_rate != 48000 && (!high_speed || (cfg.sample_rate != 88200 && cfg.sample_rate != 96000))) || cfg.frame_size != 6 || cfg.period_frames > 0x3000) return -EINVAL; switch (cfg.sample_rate) { case 44100: min_period_frames = 48; break; case 48000: min_period_frames = 52; break; default: min_period_frames = 104; break; } if (!high_speed) min_period_frames <<= 1; if (cfg.period_frames < min_period_frames) return -EINVAL; snd_power_wait(hw->card); guard(mutex)(&us122l->mutex); s = us122l->sk.s; if (!us122l->master) { us122l->master = file; } else if (us122l->master != file) { if (!s || memcmp(&cfg, &s->cfg, sizeof(cfg))) { err = -EIO; goto unlock; } us122l->slave = file; } if (!s || memcmp(&cfg, &s->cfg, sizeof(cfg)) || s->state == usb_stream_xrun) { us122l_stop(us122l); if (!us122l_start(us122l, cfg.sample_rate, cfg.period_frames)) err = -EIO; else err = 1; } unlock: wake_up_all(&us122l->sk.sleep); return err; } #define SND_USB_STREAM_ID "USB STREAM" static int usb_stream_hwdep_new(struct snd_card *card) { int err; struct snd_hwdep *hw; struct usb_device *dev = US122L(card)->dev; err = snd_hwdep_new(card, SND_USB_STREAM_ID, 0, &hw); if (err < 0) return err; hw->iface = SNDRV_HWDEP_IFACE_USB_STREAM; hw->private_data = US122L(card); hw->ops.open = usb_stream_hwdep_open; hw->ops.release = usb_stream_hwdep_release; hw->ops.ioctl = usb_stream_hwdep_ioctl; hw->ops.ioctl_compat = usb_stream_hwdep_ioctl; hw->ops.mmap = usb_stream_hwdep_mmap; hw->ops.poll = usb_stream_hwdep_poll; sprintf(hw->name, "/dev/bus/usb/%03d/%03d/hwdeppcm", dev->bus->busnum, dev->devnum); return 0; } static bool us122l_create_card(struct snd_card *card) { int err; struct us122l *us122l = US122L(card); if (us122l->is_us144) { err = usb_set_interface(us122l->dev, 0, 1); if (err) { dev_err(card->dev, "usb_set_interface error\n"); return false; } } err = usb_set_interface(us122l->dev, 1, 1); if (err) { dev_err(card->dev, "usb_set_interface error\n"); return false; } pt_info_set(us122l->dev, 0x11); pt_info_set(us122l->dev, 0x10); if (!us122l_start(us122l, 44100, 256)) return false; if (us122l->is_us144) err = us144_create_usbmidi(card); else err = us122l_create_usbmidi(card); if (err < 0) { dev_err(card->dev, "us122l_create_usbmidi error %i\n", err); goto stop; } err = usb_stream_hwdep_new(card); if (err < 0) { /* release the midi resources */ struct list_head *p; list_for_each(p, &us122l->midi_list) snd_usbmidi_disconnect(p); goto stop; } return true; stop: us122l_stop(us122l); return false; } static void snd_us122l_free(struct snd_card *card) { struct us122l *us122l = US122L(card); int index = us122l->card_index; if (index >= 0 && index < SNDRV_CARDS) snd_us122l_card_used[index] = 0; } static int usx2y_create_card(struct usb_device *device, struct usb_interface *intf, struct snd_card **cardp, unsigned long flags) { int dev; struct snd_card *card; int err; for (dev = 0; dev < SNDRV_CARDS; ++dev) if (enable[dev] && !snd_us122l_card_used[dev]) break; if (dev >= SNDRV_CARDS) return -ENODEV; err = snd_card_new(&intf->dev, index[dev], id[dev], THIS_MODULE, sizeof(struct us122l), &card); if (err < 0) return err; snd_us122l_card_used[US122L(card)->card_index = dev] = 1; card->private_free = snd_us122l_free; US122L(card)->dev = device; mutex_init(&US122L(card)->mutex); US122L(card)->sk.dev = device; init_waitqueue_head(&US122L(card)->sk.sleep); US122L(card)->is_us144 = flags & US122L_FLAG_US144; INIT_LIST_HEAD(&US122L(card)->midi_list); strscpy(card->driver, "USB "NAME_ALLCAPS""); sprintf(card->shortname, "TASCAM "NAME_ALLCAPS""); sprintf(card->longname, "%s (%x:%x if %d at %03d/%03d)", card->shortname, le16_to_cpu(device->descriptor.idVendor), le16_to_cpu(device->descriptor.idProduct), 0, US122L(card)->dev->bus->busnum, US122L(card)->dev->devnum ); *cardp = card; return 0; } static int us122l_usb_probe(struct usb_interface *intf, const struct usb_device_id *device_id, struct snd_card **cardp) { struct usb_device *device = interface_to_usbdev(intf); struct snd_card *card; int err; err = usx2y_create_card(device, intf, &card, device_id->driver_info); if (err < 0) return err; if (!us122l_create_card(card)) { snd_card_free(card); return -EINVAL; } err = snd_card_register(card); if (err < 0) { snd_card_free(card); return err; } usb_get_intf(usb_ifnum_to_if(device, 0)); usb_get_dev(device); *cardp = card; return 0; } static int snd_us122l_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *device = interface_to_usbdev(intf); struct snd_card *card; int err; if (id->driver_info & US122L_FLAG_US144 && device->speed == USB_SPEED_HIGH) { dev_err(&device->dev, "disable ehci-hcd to run US-144\n"); return -ENODEV; } if (intf->cur_altsetting->desc.bInterfaceNumber != 1) return 0; err = us122l_usb_probe(usb_get_intf(intf), id, &card); if (err < 0) { usb_put_intf(intf); return err; } usb_set_intfdata(intf, card); return 0; } static void snd_us122l_disconnect(struct usb_interface *intf) { struct snd_card *card; struct us122l *us122l; struct list_head *p; card = usb_get_intfdata(intf); if (!card) return; snd_card_disconnect(card); us122l = US122L(card); scoped_guard(mutex, &us122l->mutex) { us122l_stop(us122l); } /* release the midi resources */ list_for_each(p, &us122l->midi_list) { snd_usbmidi_disconnect(p); } usb_put_intf(usb_ifnum_to_if(us122l->dev, 0)); usb_put_intf(usb_ifnum_to_if(us122l->dev, 1)); usb_put_dev(us122l->dev); snd_card_free_when_closed(card); } static int snd_us122l_suspend(struct usb_interface *intf, pm_message_t message) { struct snd_card *card; struct us122l *us122l; struct list_head *p; card = usb_get_intfdata(intf); if (!card) return 0; snd_power_change_state(card, SNDRV_CTL_POWER_D3hot); us122l = US122L(card); if (!us122l) return 0; list_for_each(p, &us122l->midi_list) snd_usbmidi_input_stop(p); guard(mutex)(&us122l->mutex); usb_stream_stop(&us122l->sk); return 0; } static int snd_us122l_resume(struct usb_interface *intf) { struct snd_card *card; struct us122l *us122l; struct list_head *p; int err; card = usb_get_intfdata(intf); if (!card) return 0; us122l = US122L(card); if (!us122l) return 0; guard(mutex)(&us122l->mutex); /* needed, doesn't restart without: */ if (us122l->is_us144) { err = usb_set_interface(us122l->dev, 0, 1); if (err) { dev_err(&us122l->dev->dev, "usb_set_interface error\n"); goto unlock; } } err = usb_set_interface(us122l->dev, 1, 1); if (err) { dev_err(&us122l->dev->dev, "usb_set_interface error\n"); goto unlock; } pt_info_set(us122l->dev, 0x11); pt_info_set(us122l->dev, 0x10); err = us122l_set_sample_rate(us122l->dev, us122l->sk.s->cfg.sample_rate); if (err < 0) { dev_err(&us122l->dev->dev, "us122l_set_sample_rate error\n"); goto unlock; } err = usb_stream_start(&us122l->sk); if (err) goto unlock; list_for_each(p, &us122l->midi_list) snd_usbmidi_input_start(p); unlock: snd_power_change_state(card, SNDRV_CTL_POWER_D0); return err; } static const struct usb_device_id snd_us122l_usb_id_table[] = { { .match_flags = USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x0644, .idProduct = USB_ID_US122L }, { /* US-144 only works at USB1.1! Disable module ehci-hcd. */ .match_flags = USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x0644, .idProduct = USB_ID_US144, .driver_info = US122L_FLAG_US144 }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x0644, .idProduct = USB_ID_US122MKII }, { /* terminator */ } }; MODULE_DEVICE_TABLE(usb, snd_us122l_usb_id_table); static struct usb_driver snd_us122l_usb_driver = { .name = "snd-usb-us122l", .probe = snd_us122l_probe, .disconnect = snd_us122l_disconnect, .suspend = snd_us122l_suspend, .resume = snd_us122l_resume, .reset_resume = snd_us122l_resume, .id_table = snd_us122l_usb_id_table, .supports_autosuspend = 1 }; module_usb_driver(snd_us122l_usb_driver);
116 117 86 5 30 116 115 1 116 115 117 117 114 116 118 113 2 27 3 22 1 1 136 136 136 138 5 95 10 25 138 135 126 98 27 120 23 21 68 6 7 15 106 107 9 98 142 1 142 142 1 143 140 1 1 135 1 136 1 110 5 106 31 31 23 23 8 1 21 29 30 29 10 23 22 23 23 31 31 31 34 33 33 3 31 31 3 31 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 // SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * The Internet Protocol (IP) module. * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Donald Becker, <becker@super.org> * Alan Cox, <alan@lxorguk.ukuu.org.uk> * Richard Underwood * Stefan Becker, <stefanb@yello.ping.de> * Jorge Cwik, <jorge@laser.satlink.net> * Arnt Gulbrandsen, <agulbra@nvg.unit.no> * * Fixes: * Alan Cox : Commented a couple of minor bits of surplus code * Alan Cox : Undefining IP_FORWARD doesn't include the code * (just stops a compiler warning). * Alan Cox : Frames with >=MAX_ROUTE record routes, strict routes or loose routes * are junked rather than corrupting things. * Alan Cox : Frames to bad broadcast subnets are dumped * We used to process them non broadcast and * boy could that cause havoc. * Alan Cox : ip_forward sets the free flag on the * new frame it queues. Still crap because * it copies the frame but at least it * doesn't eat memory too. * Alan Cox : Generic queue code and memory fixes. * Fred Van Kempen : IP fragment support (borrowed from NET2E) * Gerhard Koerting: Forward fragmented frames correctly. * Gerhard Koerting: Fixes to my fix of the above 8-). * Gerhard Koerting: IP interface addressing fix. * Linus Torvalds : More robustness checks * Alan Cox : Even more checks: Still not as robust as it ought to be * Alan Cox : Save IP header pointer for later * Alan Cox : ip option setting * Alan Cox : Use ip_tos/ip_ttl settings * Alan Cox : Fragmentation bogosity removed * (Thanks to Mark.Bush@prg.ox.ac.uk) * Dmitry Gorodchanin : Send of a raw packet crash fix. * Alan Cox : Silly ip bug when an overlength * fragment turns up. Now frees the * queue. * Linus Torvalds/ : Memory leakage on fragmentation * Alan Cox : handling. * Gerhard Koerting: Forwarding uses IP priority hints * Teemu Rantanen : Fragment problems. * Alan Cox : General cleanup, comments and reformat * Alan Cox : SNMP statistics * Alan Cox : BSD address rule semantics. Also see * UDP as there is a nasty checksum issue * if you do things the wrong way. * Alan Cox : Always defrag, moved IP_FORWARD to the config.in file * Alan Cox : IP options adjust sk->priority. * Pedro Roque : Fix mtu/length error in ip_forward. * Alan Cox : Avoid ip_chk_addr when possible. * Richard Underwood : IP multicasting. * Alan Cox : Cleaned up multicast handlers. * Alan Cox : RAW sockets demultiplex in the BSD style. * Gunther Mayer : Fix the SNMP reporting typo * Alan Cox : Always in group 224.0.0.1 * Pauline Middelink : Fast ip_checksum update when forwarding * Masquerading support. * Alan Cox : Multicast loopback error for 224.0.0.1 * Alan Cox : IP_MULTICAST_LOOP option. * Alan Cox : Use notifiers. * Bjorn Ekwall : Removed ip_csum (from slhc.c too) * Bjorn Ekwall : Moved ip_fast_csum to ip.h (inline!) * Stefan Becker : Send out ICMP HOST REDIRECT * Arnt Gulbrandsen : ip_build_xmit * Alan Cox : Per socket routing cache * Alan Cox : Fixed routing cache, added header cache. * Alan Cox : Loopback didn't work right in original ip_build_xmit - fixed it. * Alan Cox : Only send ICMP_REDIRECT if src/dest are the same net. * Alan Cox : Incoming IP option handling. * Alan Cox : Set saddr on raw output frames as per BSD. * Alan Cox : Stopped broadcast source route explosions. * Alan Cox : Can disable source routing * Takeshi Sone : Masquerading didn't work. * Dave Bonn,Alan Cox : Faster IP forwarding whenever possible. * Alan Cox : Memory leaks, tramples, misc debugging. * Alan Cox : Fixed multicast (by popular demand 8)) * Alan Cox : Fixed forwarding (by even more popular demand 8)) * Alan Cox : Fixed SNMP statistics [I think] * Gerhard Koerting : IP fragmentation forwarding fix * Alan Cox : Device lock against page fault. * Alan Cox : IP_HDRINCL facility. * Werner Almesberger : Zero fragment bug * Alan Cox : RAW IP frame length bug * Alan Cox : Outgoing firewall on build_xmit * A.N.Kuznetsov : IP_OPTIONS support throughout the kernel * Alan Cox : Multicast routing hooks * Jos Vos : Do accounting *before* call_in_firewall * Willy Konynenberg : Transparent proxying support * * To Fix: * IP fragmentation wants rewriting cleanly. The RFC815 algorithm is much more efficient * and could be made very efficient with the addition of some virtual memory hacks to permit * the allocation of a buffer that can then be 'grown' by twiddling page tables. * Output fragmentation wants updating along with the buffer management to use a single * interleaved copy algorithm so that fragmenting has a one copy overhead. Actual packet * output should probably do its own fragmentation at the UDP/RAW layer. TCP shouldn't cause * fragmentation anyway. */ #define pr_fmt(fmt) "IPv4: " fmt #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/net.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/inetdevice.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/indirect_call_wrapper.h> #include <net/snmp.h> #include <net/ip.h> #include <net/protocol.h> #include <net/route.h> #include <linux/skbuff.h> #include <net/sock.h> #include <net/arp.h> #include <net/icmp.h> #include <net/raw.h> #include <net/checksum.h> #include <net/inet_ecn.h> #include <linux/netfilter_ipv4.h> #include <net/xfrm.h> #include <linux/mroute.h> #include <linux/netlink.h> #include <net/dst_metadata.h> #include <net/udp.h> #include <net/tcp.h> /* * Process Router Attention IP option (RFC 2113) */ bool ip_call_ra_chain(struct sk_buff *skb) { struct ip_ra_chain *ra; u8 protocol = ip_hdr(skb)->protocol; struct sock *last = NULL; struct net_device *dev = skb->dev; struct net *net = dev_net(dev); for (ra = rcu_dereference(net->ipv4.ra_chain); ra; ra = rcu_dereference(ra->next)) { struct sock *sk = ra->sk; /* If socket is bound to an interface, only report * the packet if it came from that interface. */ if (sk && inet_sk(sk)->inet_num == protocol && (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dev->ifindex)) { if (ip_is_fragment(ip_hdr(skb))) { if (ip_defrag(net, skb, IP_DEFRAG_CALL_RA_CHAIN)) return true; } if (last) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) raw_rcv(last, skb2); } last = sk; } } if (last) { raw_rcv(last, skb); return true; } return false; } INDIRECT_CALLABLE_DECLARE(int udp_rcv(struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(int tcp_v4_rcv(struct sk_buff *)); void ip_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int protocol) { const struct net_protocol *ipprot; int raw, ret; resubmit: raw = raw_local_deliver(skb, protocol); ipprot = rcu_dereference(inet_protos[protocol]); if (ipprot) { if (!ipprot->no_policy) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { kfree_skb_reason(skb, SKB_DROP_REASON_XFRM_POLICY); return; } nf_reset_ct(skb); } ret = INDIRECT_CALL_2(ipprot->handler, tcp_v4_rcv, udp_rcv, skb); if (ret < 0) { protocol = -ret; goto resubmit; } __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS); } else { if (!raw) { if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { __IP_INC_STATS(net, IPSTATS_MIB_INUNKNOWNPROTOS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0); } kfree_skb_reason(skb, SKB_DROP_REASON_IP_NOPROTO); } else { __IP_INC_STATS(net, IPSTATS_MIB_INDELIVERS); consume_skb(skb); } } } static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) { __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); kfree_skb_reason(skb, SKB_DROP_REASON_NOMEM); return 0; } skb_clear_delivery_time(skb); __skb_pull(skb, skb_network_header_len(skb)); rcu_read_lock(); ip_protocol_deliver_rcu(net, skb, ip_hdr(skb)->protocol); rcu_read_unlock(); return 0; } /* * Deliver IP Packets to the higher protocol layers. */ int ip_local_deliver(struct sk_buff *skb) { /* * Reassemble IP fragments. */ struct net *net = dev_net(skb->dev); if (ip_is_fragment(ip_hdr(skb))) { if (ip_defrag(net, skb, IP_DEFRAG_LOCAL_DELIVER)) return 0; } return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, net, NULL, skb, skb->dev, NULL, ip_local_deliver_finish); } EXPORT_SYMBOL(ip_local_deliver); static inline enum skb_drop_reason ip_rcv_options(struct sk_buff *skb, struct net_device *dev) { const struct iphdr *iph; struct ip_options *opt; /* It looks as overkill, because not all IP options require packet mangling. But it is the easiest for now, especially taking into account that combination of IP options and running sniffer is extremely rare condition. --ANK (980813) */ if (skb_cow(skb, skb_headroom(skb))) { __IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INDISCARDS); return SKB_DROP_REASON_NOMEM; } iph = ip_hdr(skb); opt = &(IPCB(skb)->opt); opt->optlen = iph->ihl*4 - sizeof(struct iphdr); if (ip_options_compile(dev_net(dev), opt, skb)) { __IP_INC_STATS(dev_net(dev), IPSTATS_MIB_INHDRERRORS); return SKB_DROP_REASON_IP_INHDR; } if (unlikely(opt->srr)) { struct in_device *in_dev = __in_dev_get_rcu(dev); if (in_dev) { if (!IN_DEV_SOURCE_ROUTE(in_dev)) { if (IN_DEV_LOG_MARTIANS(in_dev)) net_info_ratelimited("source route option %pI4 -> %pI4\n", &iph->saddr, &iph->daddr); return SKB_DROP_REASON_NOT_SPECIFIED; } } if (ip_options_rcv_srr(skb, dev)) return SKB_DROP_REASON_NOT_SPECIFIED; } return SKB_NOT_DROPPED_YET; } static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph, const struct sk_buff *hint) { return hint && !skb_dst(skb) && ip_hdr(hint)->daddr == iph->daddr && ip_hdr(hint)->tos == iph->tos; } static int ip_rcv_finish_core(struct net *net, struct sk_buff *skb, struct net_device *dev, const struct sk_buff *hint) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; int drop_reason; if (ip_can_use_hint(skb, iph, hint)) { drop_reason = ip_route_use_hint(skb, iph->daddr, iph->saddr, ip4h_dscp(iph), dev, hint); if (unlikely(drop_reason)) goto drop_error; } if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) && !skb_dst(skb) && !skb->sk && !ip_is_fragment(iph)) { switch (iph->protocol) { case IPPROTO_TCP: if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) { tcp_v4_early_demux(skb); /* must reload iph, skb->head might have changed */ iph = ip_hdr(skb); } break; case IPPROTO_UDP: if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) { drop_reason = udp_v4_early_demux(skb); if (unlikely(drop_reason)) goto drop_error; /* must reload iph, skb->head might have changed */ iph = ip_hdr(skb); } break; } } /* * Initialise the virtual path cache for the packet. It describes * how the packet travels inside Linux networking. */ if (!skb_valid_dst(skb)) { drop_reason = ip_route_input_noref(skb, iph->daddr, iph->saddr, ip4h_dscp(iph), dev); if (unlikely(drop_reason)) goto drop_error; } else { struct in_device *in_dev = __in_dev_get_rcu(dev); if (in_dev && IN_DEV_ORCONF(in_dev, NOPOLICY)) IPCB(skb)->flags |= IPSKB_NOPOLICY; } #ifdef CONFIG_IP_ROUTE_CLASSID if (unlikely(skb_dst(skb)->tclassid)) { struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); u32 idx = skb_dst(skb)->tclassid; st[idx&0xFF].o_packets++; st[idx&0xFF].o_bytes += skb->len; st[(idx>>16)&0xFF].i_packets++; st[(idx>>16)&0xFF].i_bytes += skb->len; } #endif if (iph->ihl > 5) { drop_reason = ip_rcv_options(skb, dev); if (drop_reason) goto drop; } rt = skb_rtable(skb); if (rt->rt_type == RTN_MULTICAST) { __IP_UPD_PO_STATS(net, IPSTATS_MIB_INMCAST, skb->len); } else if (rt->rt_type == RTN_BROADCAST) { __IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len); } else if (skb->pkt_type == PACKET_BROADCAST || skb->pkt_type == PACKET_MULTICAST) { struct in_device *in_dev = __in_dev_get_rcu(dev); /* RFC 1122 3.3.6: * * When a host sends a datagram to a link-layer broadcast * address, the IP destination address MUST be a legal IP * broadcast or IP multicast address. * * A host SHOULD silently discard a datagram that is received * via a link-layer broadcast (see Section 2.4) but does not * specify an IP multicast or broadcast destination address. * * This doesn't explicitly say L2 *broadcast*, but broadcast is * in a way a form of multicast and the most common use case for * this is 802.11 protecting against cross-station spoofing (the * so-called "hole-196" attack) so do it for both. */ if (in_dev && IN_DEV_ORCONF(in_dev, DROP_UNICAST_IN_L2_MULTICAST)) { drop_reason = SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST; goto drop; } } return NET_RX_SUCCESS; drop: kfree_skb_reason(skb, drop_reason); return NET_RX_DROP; drop_error: if (drop_reason == SKB_DROP_REASON_IP_RPFILTER) __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER); goto drop; } static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb->dev; int ret; /* if ingress device is enslaved to an L3 master device pass the * skb to its handler for processing */ skb = l3mdev_ip_rcv(skb); if (!skb) return NET_RX_SUCCESS; ret = ip_rcv_finish_core(net, skb, dev, NULL); if (ret != NET_RX_DROP) ret = dst_input(skb); return ret; } /* * Main IP Receive routine. */ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net) { const struct iphdr *iph; int drop_reason; u32 len; /* When the interface is in promisc. mode, drop all the crap * that it receives, do not try to analyse it. */ if (skb->pkt_type == PACKET_OTHERHOST) { dev_core_stats_rx_otherhost_dropped_inc(skb->dev); drop_reason = SKB_DROP_REASON_OTHERHOST; goto drop; } __IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len); skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) { __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); goto out; } drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto inhdr_error; iph = ip_hdr(skb); /* * RFC1122: 3.2.1.2 MUST silently discard any IP frame that fails the checksum. * * Is the datagram acceptable? * * 1. Length at least the size of an ip header * 2. Version of 4 * 3. Checksums correctly. [Speed optimisation for later, skip loopback checksums] * 4. Doesn't have a bogus length */ if (iph->ihl < 5 || iph->version != 4) goto inhdr_error; BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1); BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0); BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE); __IP_ADD_STATS(net, IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK), max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs)); if (!pskb_may_pull(skb, iph->ihl*4)) goto inhdr_error; iph = ip_hdr(skb); if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) goto csum_error; len = iph_totlen(skb, iph); if (skb->len < len) { drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; __IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } else if (len < (iph->ihl*4)) goto inhdr_error; /* Our transport medium may have padded the buffer out. Now we know it * is IP we can trim to the true length of the frame. * Note this now means skb->len holds ntohs(iph->tot_len). */ if (pskb_trim_rcsum(skb, len)) { __IP_INC_STATS(net, IPSTATS_MIB_INDISCARDS); goto drop; } iph = ip_hdr(skb); skb->transport_header = skb->network_header + iph->ihl*4; /* Remove any debris in the socket control block */ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); IPCB(skb)->iif = skb->skb_iif; /* Must drop socket now because of tproxy. */ if (!skb_sk_is_prefetched(skb)) skb_orphan(skb); return skb; csum_error: drop_reason = SKB_DROP_REASON_IP_CSUM; __IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS); inhdr_error: if (drop_reason == SKB_DROP_REASON_NOT_SPECIFIED) drop_reason = SKB_DROP_REASON_IP_INHDR; __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS); drop: kfree_skb_reason(skb, drop_reason); out: return NULL; } /* * IP receive entry point */ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct net *net = dev_net(dev); skb = ip_rcv_core(skb, net); if (skb == NULL) return NET_RX_DROP; return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, net, NULL, skb, dev, NULL, ip_rcv_finish); } static void ip_sublist_rcv_finish(struct list_head *head) { struct sk_buff *skb, *next; list_for_each_entry_safe(skb, next, head, list) { skb_list_del_init(skb); dst_input(skb); } } static struct sk_buff *ip_extract_route_hint(const struct net *net, struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); if (fib4_has_custom_rules(net) || ipv4_is_lbcast(iph->daddr) || ipv4_is_zeronet(iph->daddr) || IPCB(skb)->flags & IPSKB_MULTIPATH) return NULL; return skb; } static void ip_list_rcv_finish(struct net *net, struct list_head *head) { struct sk_buff *skb, *next, *hint = NULL; struct dst_entry *curr_dst = NULL; LIST_HEAD(sublist); list_for_each_entry_safe(skb, next, head, list) { struct net_device *dev = skb->dev; struct dst_entry *dst; skb_list_del_init(skb); /* if ingress device is enslaved to an L3 master device pass the * skb to its handler for processing */ skb = l3mdev_ip_rcv(skb); if (!skb) continue; if (ip_rcv_finish_core(net, skb, dev, hint) == NET_RX_DROP) continue; dst = skb_dst(skb); if (curr_dst != dst) { hint = ip_extract_route_hint(net, skb); /* dispatch old sublist */ if (!list_empty(&sublist)) ip_sublist_rcv_finish(&sublist); /* start new sublist */ INIT_LIST_HEAD(&sublist); curr_dst = dst; } list_add_tail(&skb->list, &sublist); } /* dispatch final sublist */ ip_sublist_rcv_finish(&sublist); } static void ip_sublist_rcv(struct list_head *head, struct net_device *dev, struct net *net) { NF_HOOK_LIST(NFPROTO_IPV4, NF_INET_PRE_ROUTING, net, NULL, head, dev, NULL, ip_rcv_finish); ip_list_rcv_finish(net, head); } /* Receive a list of IP packets */ void ip_list_rcv(struct list_head *head, struct packet_type *pt, struct net_device *orig_dev) { struct net_device *curr_dev = NULL; struct net *curr_net = NULL; struct sk_buff *skb, *next; LIST_HEAD(sublist); list_for_each_entry_safe(skb, next, head, list) { struct net_device *dev = skb->dev; struct net *net = dev_net(dev); skb_list_del_init(skb); skb = ip_rcv_core(skb, net); if (skb == NULL) continue; if (curr_dev != dev || curr_net != net) { /* dispatch old sublist */ if (!list_empty(&sublist)) ip_sublist_rcv(&sublist, curr_dev, curr_net); /* start new sublist */ INIT_LIST_HEAD(&sublist); curr_dev = dev; curr_net = net; } list_add_tail(&skb->list, &sublist); } /* dispatch final sublist */ if (!list_empty(&sublist)) ip_sublist_rcv(&sublist, curr_dev, curr_net); }
126 127 126 1 1 1 5 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 // SPDX-License-Identifier: GPL-2.0-or-later /* * Crypto API support for SHA-1 and HMAC-SHA1 * * Copyright (c) Alan Smithee. * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> * Copyright 2025 Google LLC */ #include <crypto/internal/hash.h> #include <crypto/sha1.h> #include <linux/kernel.h> #include <linux/module.h> /* * Export and import functions. crypto_shash wants a particular format that * matches that used by some legacy drivers. It currently is the same as the * library SHA context, except the value in bytecount must be block-aligned and * the remainder must be stored in an extra u8 appended to the struct. */ #define SHA1_SHASH_STATE_SIZE (sizeof(struct sha1_ctx) + 1) static_assert(sizeof(struct sha1_ctx) == sizeof(struct sha1_state)); static_assert(offsetof(struct sha1_ctx, state) == offsetof(struct sha1_state, state)); static_assert(offsetof(struct sha1_ctx, bytecount) == offsetof(struct sha1_state, count)); static_assert(offsetof(struct sha1_ctx, buf) == offsetof(struct sha1_state, buffer)); static int __crypto_sha1_export(const struct sha1_ctx *ctx0, void *out) { struct sha1_ctx ctx = *ctx0; unsigned int partial; u8 *p = out; partial = ctx.bytecount % SHA1_BLOCK_SIZE; ctx.bytecount -= partial; memcpy(p, &ctx, sizeof(ctx)); p += sizeof(ctx); *p = partial; return 0; } static int __crypto_sha1_import(struct sha1_ctx *ctx, const void *in) { const u8 *p = in; memcpy(ctx, p, sizeof(*ctx)); p += sizeof(*ctx); ctx->bytecount += *p; return 0; } static int __crypto_sha1_export_core(const struct sha1_ctx *ctx, void *out) { memcpy(out, ctx, offsetof(struct sha1_ctx, buf)); return 0; } static int __crypto_sha1_import_core(struct sha1_ctx *ctx, const void *in) { memcpy(ctx, in, offsetof(struct sha1_ctx, buf)); return 0; } const u8 sha1_zero_message_hash[SHA1_DIGEST_SIZE] = { 0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d, 0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90, 0xaf, 0xd8, 0x07, 0x09 }; EXPORT_SYMBOL_GPL(sha1_zero_message_hash); #define SHA1_CTX(desc) ((struct sha1_ctx *)shash_desc_ctx(desc)) static int crypto_sha1_init(struct shash_desc *desc) { sha1_init(SHA1_CTX(desc)); return 0; } static int crypto_sha1_update(struct shash_desc *desc, const u8 *data, unsigned int len) { sha1_update(SHA1_CTX(desc), data, len); return 0; } static int crypto_sha1_final(struct shash_desc *desc, u8 *out) { sha1_final(SHA1_CTX(desc), out); return 0; } static int crypto_sha1_digest(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { sha1(data, len, out); return 0; } static int crypto_sha1_export(struct shash_desc *desc, void *out) { return __crypto_sha1_export(SHA1_CTX(desc), out); } static int crypto_sha1_import(struct shash_desc *desc, const void *in) { return __crypto_sha1_import(SHA1_CTX(desc), in); } static int crypto_sha1_export_core(struct shash_desc *desc, void *out) { return __crypto_sha1_export_core(SHA1_CTX(desc), out); } static int crypto_sha1_import_core(struct shash_desc *desc, const void *in) { return __crypto_sha1_import_core(SHA1_CTX(desc), in); } #define HMAC_SHA1_KEY(tfm) ((struct hmac_sha1_key *)crypto_shash_ctx(tfm)) #define HMAC_SHA1_CTX(desc) ((struct hmac_sha1_ctx *)shash_desc_ctx(desc)) static int crypto_hmac_sha1_setkey(struct crypto_shash *tfm, const u8 *raw_key, unsigned int keylen) { hmac_sha1_preparekey(HMAC_SHA1_KEY(tfm), raw_key, keylen); return 0; } static int crypto_hmac_sha1_init(struct shash_desc *desc) { hmac_sha1_init(HMAC_SHA1_CTX(desc), HMAC_SHA1_KEY(desc->tfm)); return 0; } static int crypto_hmac_sha1_update(struct shash_desc *desc, const u8 *data, unsigned int len) { hmac_sha1_update(HMAC_SHA1_CTX(desc), data, len); return 0; } static int crypto_hmac_sha1_final(struct shash_desc *desc, u8 *out) { hmac_sha1_final(HMAC_SHA1_CTX(desc), out); return 0; } static int crypto_hmac_sha1_digest(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { hmac_sha1(HMAC_SHA1_KEY(desc->tfm), data, len, out); return 0; } static int crypto_hmac_sha1_export(struct shash_desc *desc, void *out) { return __crypto_sha1_export(&HMAC_SHA1_CTX(desc)->sha_ctx, out); } static int crypto_hmac_sha1_import(struct shash_desc *desc, const void *in) { struct hmac_sha1_ctx *ctx = HMAC_SHA1_CTX(desc); ctx->ostate = HMAC_SHA1_KEY(desc->tfm)->ostate; return __crypto_sha1_import(&ctx->sha_ctx, in); } static int crypto_hmac_sha1_export_core(struct shash_desc *desc, void *out) { return __crypto_sha1_export_core(&HMAC_SHA1_CTX(desc)->sha_ctx, out); } static int crypto_hmac_sha1_import_core(struct shash_desc *desc, const void *in) { struct hmac_sha1_ctx *ctx = HMAC_SHA1_CTX(desc); ctx->ostate = HMAC_SHA1_KEY(desc->tfm)->ostate; return __crypto_sha1_import_core(&ctx->sha_ctx, in); } static struct shash_alg algs[] = { { .base.cra_name = "sha1", .base.cra_driver_name = "sha1-lib", .base.cra_priority = 300, .base.cra_blocksize = SHA1_BLOCK_SIZE, .base.cra_module = THIS_MODULE, .digestsize = SHA1_DIGEST_SIZE, .init = crypto_sha1_init, .update = crypto_sha1_update, .final = crypto_sha1_final, .digest = crypto_sha1_digest, .export = crypto_sha1_export, .import = crypto_sha1_import, .export_core = crypto_sha1_export_core, .import_core = crypto_sha1_import_core, .descsize = sizeof(struct sha1_ctx), .statesize = SHA1_SHASH_STATE_SIZE, }, { .base.cra_name = "hmac(sha1)", .base.cra_driver_name = "hmac-sha1-lib", .base.cra_priority = 300, .base.cra_blocksize = SHA1_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct hmac_sha1_key), .base.cra_module = THIS_MODULE, .digestsize = SHA1_DIGEST_SIZE, .setkey = crypto_hmac_sha1_setkey, .init = crypto_hmac_sha1_init, .update = crypto_hmac_sha1_update, .final = crypto_hmac_sha1_final, .digest = crypto_hmac_sha1_digest, .export = crypto_hmac_sha1_export, .import = crypto_hmac_sha1_import, .export_core = crypto_hmac_sha1_export_core, .import_core = crypto_hmac_sha1_import_core, .descsize = sizeof(struct hmac_sha1_ctx), .statesize = SHA1_SHASH_STATE_SIZE, }, }; static int __init crypto_sha1_mod_init(void) { return crypto_register_shashes(algs, ARRAY_SIZE(algs)); } module_init(crypto_sha1_mod_init); static void __exit crypto_sha1_mod_exit(void) { crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); } module_exit(crypto_sha1_mod_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Crypto API support for SHA-1 and HMAC-SHA1"); MODULE_ALIAS_CRYPTO("sha1"); MODULE_ALIAS_CRYPTO("sha1-lib"); MODULE_ALIAS_CRYPTO("hmac(sha1)"); MODULE_ALIAS_CRYPTO("hmac-sha1-lib");
52 41 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 /* SPDX-License-Identifier: GPL-2.0 */ /* * INETPEER - A storage for permanent information about peers * * Authors: Andrey V. Savochkin <saw@msu.ru> */ #ifndef _NET_INETPEER_H #define _NET_INETPEER_H #include <linux/types.h> #include <linux/init.h> #include <linux/jiffies.h> #include <linux/spinlock.h> #include <linux/rtnetlink.h> #include <net/ipv6.h> #include <linux/atomic.h> /* IPv4 address key for cache lookups */ struct ipv4_addr_key { __be32 addr; int vif; }; #define INETPEER_MAXKEYSZ (sizeof(struct in6_addr) / sizeof(u32)) struct inetpeer_addr { union { struct ipv4_addr_key a4; struct in6_addr a6; u32 key[INETPEER_MAXKEYSZ]; }; __u16 family; }; struct inet_peer { struct rb_node rb_node; struct inetpeer_addr daddr; u32 metrics[RTAX_MAX]; u32 rate_tokens; /* rate limiting for ICMP */ u32 n_redirects; unsigned long rate_last; /* * Once inet_peer is queued for deletion (refcnt == 0), following field * is not available: rid * We can share memory with rcu_head to help keep inet_peer small. */ union { struct { atomic_t rid; /* Frag reception counter */ }; struct rcu_head rcu; }; /* following fields might be frequently dirtied */ __u32 dtime; /* the time of last use of not referenced entries */ refcount_t refcnt; }; struct inet_peer_base { struct rb_root rb_root; seqlock_t lock; int total; }; void inet_peer_base_init(struct inet_peer_base *); void inet_initpeers(void) __init; #define INETPEER_METRICS_NEW (~(u32) 0) static inline void inetpeer_set_addr_v4(struct inetpeer_addr *iaddr, __be32 ip) { iaddr->a4.addr = ip; iaddr->a4.vif = 0; iaddr->family = AF_INET; } static inline __be32 inetpeer_get_addr_v4(struct inetpeer_addr *iaddr) { return iaddr->a4.addr; } static inline void inetpeer_set_addr_v6(struct inetpeer_addr *iaddr, struct in6_addr *in6) { iaddr->a6 = *in6; iaddr->family = AF_INET6; } static inline struct in6_addr *inetpeer_get_addr_v6(struct inetpeer_addr *iaddr) { return &iaddr->a6; } /* can be called with or without local BH being disabled */ struct inet_peer *inet_getpeer(struct inet_peer_base *base, const struct inetpeer_addr *daddr); static inline struct inet_peer *inet_getpeer_v4(struct inet_peer_base *base, __be32 v4daddr, int vif) { struct inetpeer_addr daddr; daddr.a4.addr = v4daddr; daddr.a4.vif = vif; daddr.family = AF_INET; return inet_getpeer(base, &daddr); } static inline struct inet_peer *inet_getpeer_v6(struct inet_peer_base *base, const struct in6_addr *v6daddr) { struct inetpeer_addr daddr; daddr.a6 = *v6daddr; daddr.family = AF_INET6; return inet_getpeer(base, &daddr); } static inline int inetpeer_addr_cmp(const struct inetpeer_addr *a, const struct inetpeer_addr *b) { int i, n; if (a->family == AF_INET) n = sizeof(a->a4) / sizeof(u32); else n = sizeof(a->a6) / sizeof(u32); for (i = 0; i < n; i++) { if (a->key[i] == b->key[i]) continue; if (a->key[i] < b->key[i]) return -1; return 1; } return 0; } /* can be called from BH context or outside */ void inet_putpeer(struct inet_peer *p); bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout); void inetpeer_invalidate_tree(struct inet_peer_base *); #endif /* _NET_INETPEER_H */
6420 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_ATOMIC64_64_H #define _ASM_X86_ATOMIC64_64_H #include <linux/types.h> #include <asm/alternative.h> #include <asm/cmpxchg.h> /* The 64-bit atomic type */ #define ATOMIC64_INIT(i) { (i) } static __always_inline s64 arch_atomic64_read(const atomic64_t *v) { return __READ_ONCE((v)->counter); } static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i) { __WRITE_ONCE(v->counter, i); } static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) { asm_inline volatile(LOCK_PREFIX "addq %1, %0" : "=m" (v->counter) : "er" (i), "m" (v->counter) : "memory"); } static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v) { asm_inline volatile(LOCK_PREFIX "subq %1, %0" : "=m" (v->counter) : "er" (i), "m" (v->counter) : "memory"); } static __always_inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, e, "er", i); } #define arch_atomic64_sub_and_test arch_atomic64_sub_and_test static __always_inline void arch_atomic64_inc(atomic64_t *v) { asm_inline volatile(LOCK_PREFIX "incq %0" : "=m" (v->counter) : "m" (v->counter) : "memory"); } #define arch_atomic64_inc arch_atomic64_inc static __always_inline void arch_atomic64_dec(atomic64_t *v) { asm_inline volatile(LOCK_PREFIX "decq %0" : "=m" (v->counter) : "m" (v->counter) : "memory"); } #define arch_atomic64_dec arch_atomic64_dec static __always_inline bool arch_atomic64_dec_and_test(atomic64_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, e); } #define arch_atomic64_dec_and_test arch_atomic64_dec_and_test static __always_inline bool arch_atomic64_inc_and_test(atomic64_t *v) { return GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, e); } #define arch_atomic64_inc_and_test arch_atomic64_inc_and_test static __always_inline bool arch_atomic64_add_negative(s64 i, atomic64_t *v) { return GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, s, "er", i); } #define arch_atomic64_add_negative arch_atomic64_add_negative static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) { return i + xadd(&v->counter, i); } #define arch_atomic64_add_return arch_atomic64_add_return #define arch_atomic64_sub_return(i, v) arch_atomic64_add_return(-(i), v) static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) { return xadd(&v->counter, i); } #define arch_atomic64_fetch_add arch_atomic64_fetch_add #define arch_atomic64_fetch_sub(i, v) arch_atomic64_fetch_add(-(i), v) static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) { return arch_cmpxchg(&v->counter, old, new); } #define arch_atomic64_cmpxchg arch_atomic64_cmpxchg static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) { return arch_try_cmpxchg(&v->counter, old, new); } #define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new) { return arch_xchg(&v->counter, new); } #define arch_atomic64_xchg arch_atomic64_xchg static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v) { asm_inline volatile(LOCK_PREFIX "andq %1, %0" : "+m" (v->counter) : "er" (i) : "memory"); } static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v) { s64 val = arch_atomic64_read(v); do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i)); return val; } #define arch_atomic64_fetch_and arch_atomic64_fetch_and static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v) { asm_inline volatile(LOCK_PREFIX "orq %1, %0" : "+m" (v->counter) : "er" (i) : "memory"); } static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v) { s64 val = arch_atomic64_read(v); do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i)); return val; } #define arch_atomic64_fetch_or arch_atomic64_fetch_or static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v) { asm_inline volatile(LOCK_PREFIX "xorq %1, %0" : "+m" (v->counter) : "er" (i) : "memory"); } static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v) { s64 val = arch_atomic64_read(v); do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i)); return val; } #define arch_atomic64_fetch_xor arch_atomic64_fetch_xor #endif /* _ASM_X86_ATOMIC64_64_H */
7 7 7 7 6 7 7 7 7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. */ #include <linux/fs.h> #include <linux/blkdev.h> #include <linux/radix-tree.h> #include <linux/writeback.h> #include <linux/workqueue.h> #include <linux/kthread.h> #include <linux/slab.h> #include <linux/migrate.h> #include <linux/ratelimit.h> #include <linux/uuid.h> #include <linux/semaphore.h> #include <linux/error-injection.h> #include <linux/crc32c.h> #include <linux/sched/mm.h> #include <linux/unaligned.h> #include <crypto/hash.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" #include "bio.h" #include "print-tree.h" #include "locking.h" #include "tree-log.h" #include "free-space-cache.h" #include "free-space-tree.h" #include "dev-replace.h" #include "raid56.h" #include "sysfs.h" #include "qgroup.h" #include "compression.h" #include "tree-checker.h" #include "ref-verify.h" #include "block-group.h" #include "discard.h" #include "space-info.h" #include "zoned.h" #include "subpage.h" #include "fs.h" #include "accessors.h" #include "extent-tree.h" #include "root-tree.h" #include "defrag.h" #include "uuid-tree.h" #include "relocation.h" #include "scrub.h" #include "super.h" #include "delayed-inode.h" #define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\ BTRFS_HEADER_FLAG_RELOC |\ BTRFS_SUPER_FLAG_ERROR |\ BTRFS_SUPER_FLAG_SEEDING |\ BTRFS_SUPER_FLAG_METADUMP |\ BTRFS_SUPER_FLAG_METADUMP_V2) static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info); static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info); static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info) { if (fs_info->csum_shash) crypto_free_shash(fs_info->csum_shash); } /* * Compute the csum of a btree block and store the result to provided buffer. */ static void csum_tree_block(struct extent_buffer *buf, u8 *result) { struct btrfs_fs_info *fs_info = buf->fs_info; int num_pages; u32 first_page_part; SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); char *kaddr; int i; shash->tfm = fs_info->csum_shash; crypto_shash_init(shash); if (buf->addr) { /* Pages are contiguous, handle them as a big one. */ kaddr = buf->addr; first_page_part = fs_info->nodesize; num_pages = 1; } else { kaddr = folio_address(buf->folios[0]); first_page_part = min_t(u32, PAGE_SIZE, fs_info->nodesize); num_pages = num_extent_pages(buf); } crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE, first_page_part - BTRFS_CSUM_SIZE); /* * Multiple single-page folios case would reach here. * * nodesize <= PAGE_SIZE and large folio all handled by above * crypto_shash_update() already. */ for (i = 1; i < num_pages && INLINE_EXTENT_BUFFER_PAGES > 1; i++) { kaddr = folio_address(buf->folios[i]); crypto_shash_update(shash, kaddr, PAGE_SIZE); } memset(result, 0, BTRFS_CSUM_SIZE); crypto_shash_final(shash, result); } /* * we can't consider a given block up to date unless the transid of the * block matches the transid in the parent node's pointer. This is how we * detect blocks that either didn't get written at all or got written * in the wrong place. */ int btrfs_buffer_uptodate(struct extent_buffer *eb, u64 parent_transid, bool atomic) { if (!extent_buffer_uptodate(eb)) return 0; if (!parent_transid || btrfs_header_generation(eb) == parent_transid) return 1; if (atomic) return -EAGAIN; if (!extent_buffer_uptodate(eb) || btrfs_header_generation(eb) != parent_transid) { btrfs_err_rl(eb->fs_info, "parent transid verify failed on logical %llu mirror %u wanted %llu found %llu", eb->start, eb->read_mirror, parent_transid, btrfs_header_generation(eb)); clear_extent_buffer_uptodate(eb); return 0; } return 1; } static bool btrfs_supported_super_csum(u16 csum_type) { switch (csum_type) { case BTRFS_CSUM_TYPE_CRC32: case BTRFS_CSUM_TYPE_XXHASH: case BTRFS_CSUM_TYPE_SHA256: case BTRFS_CSUM_TYPE_BLAKE2: return true; default: return false; } } /* * Return 0 if the superblock checksum type matches the checksum value of that * algorithm. Pass the raw disk superblock data. */ int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, const struct btrfs_super_block *disk_sb) { char result[BTRFS_CSUM_SIZE]; SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); shash->tfm = fs_info->csum_shash; /* * The super_block structure does not span the whole * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space is * filled with zeros and is included in the checksum. */ crypto_shash_digest(shash, (const u8 *)disk_sb + BTRFS_CSUM_SIZE, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, result); if (memcmp(disk_sb->csum, result, fs_info->csum_size)) return 1; return 0; } static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num) { struct btrfs_fs_info *fs_info = eb->fs_info; const u32 step = min(fs_info->nodesize, PAGE_SIZE); const u32 nr_steps = eb->len / step; phys_addr_t paddrs[BTRFS_MAX_BLOCKSIZE / PAGE_SIZE]; int ret = 0; if (sb_rdonly(fs_info->sb)) return -EROFS; for (int i = 0; i < num_extent_pages(eb); i++) { struct folio *folio = eb->folios[i]; /* No large folio support yet. */ ASSERT(folio_order(folio) == 0); ASSERT(i < nr_steps); /* * For nodesize < page size, there is just one paddr, with some * offset inside the page. * * For nodesize >= page size, it's one or more paddrs, and eb->start * must be aligned to page boundary. */ paddrs[i] = page_to_phys(&folio->page) + offset_in_page(eb->start); } ret = btrfs_repair_io_failure(fs_info, 0, eb->start, eb->len, eb->start, paddrs, step, mirror_num); return ret; } /* * helper to read a given tree block, doing retries as required when * the checksums don't match and we have alternate mirrors to try. * * @check: expected tree parentness check, see the comments of the * structure for details. */ int btrfs_read_extent_buffer(struct extent_buffer *eb, const struct btrfs_tree_parent_check *check) { struct btrfs_fs_info *fs_info = eb->fs_info; int failed = 0; int ret; int num_copies = 0; int mirror_num = 0; int failed_mirror = 0; ASSERT(check); while (1) { ret = read_extent_buffer_pages(eb, mirror_num, check); if (!ret) break; num_copies = btrfs_num_copies(fs_info, eb->start, eb->len); if (num_copies == 1) break; if (!failed_mirror) { failed = 1; failed_mirror = eb->read_mirror; } mirror_num++; if (mirror_num == failed_mirror) mirror_num++; if (mirror_num > num_copies) break; } if (failed && !ret && failed_mirror) btrfs_repair_eb_io_failure(eb, failed_mirror); return ret; } /* * Checksum a dirty tree block before IO. */ int btree_csum_one_bio(struct btrfs_bio *bbio) { struct extent_buffer *eb = bbio->private; struct btrfs_fs_info *fs_info = eb->fs_info; u64 found_start = btrfs_header_bytenr(eb); u64 last_trans; u8 result[BTRFS_CSUM_SIZE]; int ret; /* Btree blocks are always contiguous on disk. */ if (WARN_ON_ONCE(bbio->file_offset != eb->start)) return -EIO; if (WARN_ON_ONCE(bbio->bio.bi_iter.bi_size != eb->len)) return -EIO; /* * If an extent_buffer is marked as EXTENT_BUFFER_ZONED_ZEROOUT, don't * checksum it but zero-out its content. This is done to preserve * ordering of I/O without unnecessarily writing out data. */ if (test_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &eb->bflags)) { memzero_extent_buffer(eb, 0, eb->len); return 0; } if (WARN_ON_ONCE(found_start != eb->start)) return -EIO; if (WARN_ON(!btrfs_meta_folio_test_uptodate(eb->folios[0], eb))) return -EIO; ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid, offsetof(struct btrfs_header, fsid), BTRFS_FSID_SIZE) == 0); csum_tree_block(eb, result); if (btrfs_header_level(eb)) ret = btrfs_check_node(eb); else ret = btrfs_check_leaf(eb); if (ret < 0) goto error; /* * Also check the generation, the eb reached here must be newer than * last committed. Or something seriously wrong happened. */ last_trans = btrfs_get_last_trans_committed(fs_info); if (unlikely(btrfs_header_generation(eb) <= last_trans)) { ret = -EUCLEAN; btrfs_err(fs_info, "block=%llu bad generation, have %llu expect > %llu", eb->start, btrfs_header_generation(eb), last_trans); goto error; } write_extent_buffer(eb, result, 0, fs_info->csum_size); return 0; error: btrfs_print_tree(eb, 0); btrfs_err(fs_info, "block=%llu write time tree block corruption detected", eb->start); /* * Be noisy if this is an extent buffer from a log tree. We don't abort * a transaction in case there's a bad log tree extent buffer, we just * fallback to a transaction commit. Still we want to know when there is * a bad log tree extent buffer, as that may signal a bug somewhere. */ WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG) || btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID); return ret; } static bool check_tree_block_fsid(struct extent_buffer *eb) { struct btrfs_fs_info *fs_info = eb->fs_info; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs; u8 fsid[BTRFS_FSID_SIZE]; read_extent_buffer(eb, fsid, offsetof(struct btrfs_header, fsid), BTRFS_FSID_SIZE); /* * alloc_fsid_devices() copies the fsid into fs_devices::metadata_uuid. * This is then overwritten by metadata_uuid if it is present in the * device_list_add(). The same true for a seed device as well. So use of * fs_devices::metadata_uuid is appropriate here. */ if (memcmp(fsid, fs_info->fs_devices->metadata_uuid, BTRFS_FSID_SIZE) == 0) return false; list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list) if (!memcmp(fsid, seed_devs->fsid, BTRFS_FSID_SIZE)) return false; return true; } /* Do basic extent buffer checks at read time */ int btrfs_validate_extent_buffer(struct extent_buffer *eb, const struct btrfs_tree_parent_check *check) { struct btrfs_fs_info *fs_info = eb->fs_info; u64 found_start; const u32 csum_size = fs_info->csum_size; u8 found_level; u8 result[BTRFS_CSUM_SIZE]; const u8 *header_csum; int ret = 0; const bool ignore_csum = btrfs_test_opt(fs_info, IGNOREMETACSUMS); ASSERT(check); found_start = btrfs_header_bytenr(eb); if (unlikely(found_start != eb->start)) { btrfs_err_rl(fs_info, "bad tree block start, mirror %u want %llu have %llu", eb->read_mirror, eb->start, found_start); ret = -EIO; goto out; } if (unlikely(check_tree_block_fsid(eb))) { btrfs_err_rl(fs_info, "bad fsid on logical %llu mirror %u", eb->start, eb->read_mirror); ret = -EIO; goto out; } found_level = btrfs_header_level(eb); if (unlikely(found_level >= BTRFS_MAX_LEVEL)) { btrfs_err(fs_info, "bad tree block level, mirror %u level %d on logical %llu", eb->read_mirror, btrfs_header_level(eb), eb->start); ret = -EIO; goto out; } csum_tree_block(eb, result); header_csum = folio_address(eb->folios[0]) + get_eb_offset_in_folio(eb, offsetof(struct btrfs_header, csum)); if (memcmp(result, header_csum, csum_size) != 0) { btrfs_warn_rl(fs_info, "checksum verify failed on logical %llu mirror %u wanted " BTRFS_CSUM_FMT " found " BTRFS_CSUM_FMT " level %d%s", eb->start, eb->read_mirror, BTRFS_CSUM_FMT_VALUE(csum_size, header_csum), BTRFS_CSUM_FMT_VALUE(csum_size, result), btrfs_header_level(eb), ignore_csum ? ", ignored" : ""); if (unlikely(!ignore_csum)) { ret = -EUCLEAN; goto out; } } if (unlikely(found_level != check->level)) { btrfs_err(fs_info, "level verify failed on logical %llu mirror %u wanted %u found %u", eb->start, eb->read_mirror, check->level, found_level); ret = -EIO; goto out; } if (unlikely(check->transid && btrfs_header_generation(eb) != check->transid)) { btrfs_err_rl(eb->fs_info, "parent transid verify failed on logical %llu mirror %u wanted %llu found %llu", eb->start, eb->read_mirror, check->transid, btrfs_header_generation(eb)); ret = -EIO; goto out; } if (check->has_first_key) { const struct btrfs_key *expect_key = &check->first_key; struct btrfs_key found_key; if (found_level) btrfs_node_key_to_cpu(eb, &found_key, 0); else btrfs_item_key_to_cpu(eb, &found_key, 0); if (unlikely(btrfs_comp_cpu_keys(expect_key, &found_key))) { btrfs_err(fs_info, "tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)", eb->start, check->transid, expect_key->objectid, expect_key->type, expect_key->offset, found_key.objectid, found_key.type, found_key.offset); ret = -EUCLEAN; goto out; } } if (check->owner_root) { ret = btrfs_check_eb_owner(eb, check->owner_root); if (ret < 0) goto out; } /* If this is a leaf block and it is corrupt, just return -EIO. */ if (found_level == 0 && btrfs_check_leaf(eb)) ret = -EIO; if (found_level > 0 && btrfs_check_node(eb)) ret = -EIO; if (ret) btrfs_err(fs_info, "read time tree block corruption detected on logical %llu mirror %u", eb->start, eb->read_mirror); out: return ret; } #ifdef CONFIG_MIGRATION static int btree_migrate_folio(struct address_space *mapping, struct folio *dst, struct folio *src, enum migrate_mode mode) { /* * we can't safely write a btree page from here, * we haven't done the locking hook */ if (folio_test_dirty(src)) return -EAGAIN; /* * Buffers may be managed in a filesystem specific way. * We must have no buffers or drop them. */ if (folio_get_private(src) && !filemap_release_folio(src, GFP_KERNEL)) return -EAGAIN; return migrate_folio(mapping, dst, src, mode); } #else #define btree_migrate_folio NULL #endif static int btree_writepages(struct address_space *mapping, struct writeback_control *wbc) { int ret; if (wbc->sync_mode == WB_SYNC_NONE) { struct btrfs_fs_info *fs_info; if (wbc->for_kupdate) return 0; fs_info = inode_to_fs_info(mapping->host); /* this is a bit racy, but that's ok */ ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes, BTRFS_DIRTY_METADATA_THRESH, fs_info->dirty_metadata_batch); if (ret < 0) return 0; } return btree_write_cache_pages(mapping, wbc); } static bool btree_release_folio(struct folio *folio, gfp_t gfp_flags) { if (folio_test_writeback(folio) || folio_test_dirty(folio)) return false; return try_release_extent_buffer(folio); } static void btree_invalidate_folio(struct folio *folio, size_t offset, size_t length) { struct extent_io_tree *tree; tree = &folio_to_inode(folio)->io_tree; extent_invalidate_folio(tree, folio, offset); btree_release_folio(folio, GFP_NOFS); if (folio_get_private(folio)) { btrfs_warn(folio_to_fs_info(folio), "folio private not zero on folio %llu", (unsigned long long)folio_pos(folio)); folio_detach_private(folio); } } #ifdef DEBUG static bool btree_dirty_folio(struct address_space *mapping, struct folio *folio) { struct btrfs_fs_info *fs_info = inode_to_fs_info(mapping->host); struct btrfs_subpage_info *spi = fs_info->subpage_info; struct btrfs_subpage *subpage; struct extent_buffer *eb; int cur_bit = 0; u64 page_start = folio_pos(folio); if (fs_info->sectorsize == PAGE_SIZE) { eb = folio_get_private(folio); BUG_ON(!eb); BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); BUG_ON(!atomic_read(&eb->refs)); btrfs_assert_tree_write_locked(eb); return filemap_dirty_folio(mapping, folio); } ASSERT(spi); subpage = folio_get_private(folio); for (cur_bit = spi->dirty_offset; cur_bit < spi->dirty_offset + spi->bitmap_nr_bits; cur_bit++) { unsigned long flags; u64 cur; spin_lock_irqsave(&subpage->lock, flags); if (!test_bit(cur_bit, subpage->bitmaps)) { spin_unlock_irqrestore(&subpage->lock, flags); continue; } spin_unlock_irqrestore(&subpage->lock, flags); cur = page_start + cur_bit * fs_info->sectorsize; eb = find_extent_buffer(fs_info, cur); ASSERT(eb); ASSERT(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); ASSERT(atomic_read(&eb->refs)); btrfs_assert_tree_write_locked(eb); free_extent_buffer(eb); cur_bit += (fs_info->nodesize >> fs_info->sectorsize_bits) - 1; } return filemap_dirty_folio(mapping, folio); } #else #define btree_dirty_folio filemap_dirty_folio #endif static const struct address_space_operations btree_aops = { .writepages = btree_writepages, .release_folio = btree_release_folio, .invalidate_folio = btree_invalidate_folio, .migrate_folio = btree_migrate_folio, .dirty_folio = btree_dirty_folio, }; struct extent_buffer *btrfs_find_create_tree_block( struct btrfs_fs_info *fs_info, u64 bytenr, u64 owner_root, int level) { if (btrfs_is_testing(fs_info)) return alloc_test_extent_buffer(fs_info, bytenr); return alloc_extent_buffer(fs_info, bytenr, owner_root, level); } /* * Read tree block at logical address @bytenr and do variant basic but critical * verification. * * @check: expected tree parentness check, see comments of the * structure for details. */ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, struct btrfs_tree_parent_check *check) { struct extent_buffer *buf = NULL; int ret; ASSERT(check); buf = btrfs_find_create_tree_block(fs_info, bytenr, check->owner_root, check->level); if (IS_ERR(buf)) return buf; ret = btrfs_read_extent_buffer(buf, check); if (ret) { free_extent_buffer_stale(buf); return ERR_PTR(ret); } return buf; } static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info, u64 objectid, gfp_t flags) { struct btrfs_root *root; root = kzalloc(sizeof(*root), flags); if (!root) return NULL; memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); root->fs_info = fs_info; root->root_key.objectid = objectid; root->node = NULL; root->commit_root = NULL; root->state = 0; RB_CLEAR_NODE(&root->rb_node); btrfs_set_root_last_trans(root, 0); root->free_objectid = 0; root->nr_delalloc_inodes = 0; root->nr_ordered_extents = 0; xa_init(&root->inodes); xa_init(&root->delayed_nodes); btrfs_init_root_block_rsv(root); INIT_LIST_HEAD(&root->dirty_list); INIT_LIST_HEAD(&root->root_list); INIT_LIST_HEAD(&root->delalloc_inodes); INIT_LIST_HEAD(&root->delalloc_root); INIT_LIST_HEAD(&root->ordered_extents); INIT_LIST_HEAD(&root->ordered_root); INIT_LIST_HEAD(&root->reloc_dirty_list); spin_lock_init(&root->delalloc_lock); spin_lock_init(&root->ordered_extent_lock); spin_lock_init(&root->accounting_lock); spin_lock_init(&root->qgroup_meta_rsv_lock); mutex_init(&root->objectid_mutex); mutex_init(&root->log_mutex); mutex_init(&root->ordered_extent_mutex); mutex_init(&root->delalloc_mutex); init_waitqueue_head(&root->qgroup_flush_wait); init_waitqueue_head(&root->log_writer_wait); init_waitqueue_head(&root->log_commit_wait[0]); init_waitqueue_head(&root->log_commit_wait[1]); INIT_LIST_HEAD(&root->log_ctxs[0]); INIT_LIST_HEAD(&root->log_ctxs[1]); atomic_set(&root->log_commit[0], 0); atomic_set(&root->log_commit[1], 0); atomic_set(&root->log_writers, 0); atomic_set(&root->log_batch, 0); refcount_set(&root->refs, 1); atomic_set(&root->snapshot_force_cow, 0); atomic_set(&root->nr_swapfiles, 0); btrfs_set_root_log_transid(root, 0); root->log_transid_committed = -1; btrfs_set_root_last_log_commit(root, 0); root->anon_dev = 0; if (!btrfs_is_testing(fs_info)) { btrfs_extent_io_tree_init(fs_info, &root->dirty_log_pages, IO_TREE_ROOT_DIRTY_LOG_PAGES); btrfs_extent_io_tree_init(fs_info, &root->log_csum_range, IO_TREE_LOG_CSUM_RANGE); } spin_lock_init(&root->root_item_lock); btrfs_qgroup_init_swapped_blocks(&root->swapped_blocks); #ifdef CONFIG_BTRFS_DEBUG INIT_LIST_HEAD(&root->leak_list); spin_lock(&fs_info->fs_roots_radix_lock); list_add_tail(&root->leak_list, &fs_info->allocated_roots); spin_unlock(&fs_info->fs_roots_radix_lock); #endif return root; } #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS /* Should only be used by the testing infrastructure */ struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info) { struct btrfs_root *root; if (!fs_info) return ERR_PTR(-EINVAL); root = btrfs_alloc_root(fs_info, BTRFS_ROOT_TREE_OBJECTID, GFP_KERNEL); if (!root) return ERR_PTR(-ENOMEM); /* We don't use the stripesize in selftest, set it as sectorsize */ root->alloc_bytenr = 0; return root; } #endif static int global_root_cmp(struct rb_node *a_node, const struct rb_node *b_node) { const struct btrfs_root *a = rb_entry(a_node, struct btrfs_root, rb_node); const struct btrfs_root *b = rb_entry(b_node, struct btrfs_root, rb_node); return btrfs_comp_cpu_keys(&a->root_key, &b->root_key); } static int global_root_key_cmp(const void *k, const struct rb_node *node) { const struct btrfs_key *key = k; const struct btrfs_root *root = rb_entry(node, struct btrfs_root, rb_node); return btrfs_comp_cpu_keys(key, &root->root_key); } int btrfs_global_root_insert(struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; struct rb_node *tmp; int ret = 0; write_lock(&fs_info->global_root_lock); tmp = rb_find_add(&root->rb_node, &fs_info->global_root_tree, global_root_cmp); write_unlock(&fs_info->global_root_lock); if (tmp) { ret = -EEXIST; btrfs_warn(fs_info, "global root %llu %llu already exists", btrfs_root_id(root), root->root_key.offset); } return ret; } void btrfs_global_root_delete(struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; write_lock(&fs_info->global_root_lock); rb_erase(&root->rb_node, &fs_info->global_root_tree); write_unlock(&fs_info->global_root_lock); } struct btrfs_root *btrfs_global_root(struct btrfs_fs_info *fs_info, struct btrfs_key *key) { struct rb_node *node; struct btrfs_root *root = NULL; read_lock(&fs_info->global_root_lock); node = rb_find(key, &fs_info->global_root_tree, global_root_key_cmp); if (node) root = container_of(node, struct btrfs_root, rb_node); read_unlock(&fs_info->global_root_lock); return root; } static u64 btrfs_global_root_id(struct btrfs_fs_info *fs_info, u64 bytenr) { struct btrfs_block_group *block_group; u64 ret; if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) return 0; if (bytenr) block_group = btrfs_lookup_block_group(fs_info, bytenr); else block_group = btrfs_lookup_first_block_group(fs_info, bytenr); ASSERT(block_group); if (!block_group) return 0; ret = block_group->global_root_id; btrfs_put_block_group(block_group); return ret; } struct btrfs_root *btrfs_csum_root(struct btrfs_fs_info *fs_info, u64 bytenr) { struct btrfs_key key = { .objectid = BTRFS_CSUM_TREE_OBJECTID, .type = BTRFS_ROOT_ITEM_KEY, .offset = btrfs_global_root_id(fs_info, bytenr), }; return btrfs_global_root(fs_info, &key); } struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr) { struct btrfs_key key = { .objectid = BTRFS_EXTENT_TREE_OBJECTID, .type = BTRFS_ROOT_ITEM_KEY, .offset = btrfs_global_root_id(fs_info, bytenr), }; return btrfs_global_root(fs_info, &key); } struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, u64 objectid) { struct btrfs_fs_info *fs_info = trans->fs_info; struct extent_buffer *leaf; struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *root; struct btrfs_key key; unsigned int nofs_flag; int ret = 0; /* * We're holding a transaction handle, so use a NOFS memory allocation * context to avoid deadlock if reclaim happens. */ nofs_flag = memalloc_nofs_save(); root = btrfs_alloc_root(fs_info, objectid, GFP_KERNEL); memalloc_nofs_restore(nofs_flag); if (!root) return ERR_PTR(-ENOMEM); root->root_key.objectid = objectid; root->root_key.type = BTRFS_ROOT_ITEM_KEY; root->root_key.offset = 0; leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0, 0, BTRFS_NESTING_NORMAL); if (IS_ERR(leaf)) { ret = PTR_ERR(leaf); leaf = NULL; goto fail; } root->node = leaf; btrfs_mark_buffer_dirty(trans, leaf); root->commit_root = btrfs_root_node(root); set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); btrfs_set_root_flags(&root->root_item, 0); btrfs_set_root_limit(&root->root_item, 0); btrfs_set_root_bytenr(&root->root_item, leaf->start); btrfs_set_root_generation(&root->root_item, trans->transid); btrfs_set_root_level(&root->root_item, 0); btrfs_set_root_refs(&root->root_item, 1); btrfs_set_root_used(&root->root_item, leaf->len); btrfs_set_root_last_snapshot(&root->root_item, 0); btrfs_set_root_dirid(&root->root_item, 0); if (btrfs_is_fstree(objectid)) generate_random_guid(root->root_item.uuid); else export_guid(root->root_item.uuid, &guid_null); btrfs_set_root_drop_level(&root->root_item, 0); btrfs_tree_unlock(leaf); key.objectid = objectid; key.type = BTRFS_ROOT_ITEM_KEY; key.offset = 0; ret = btrfs_insert_root(trans, tree_root, &key, &root->root_item); if (ret) goto fail; return root; fail: btrfs_put_root(root); return ERR_PTR(ret); } static struct btrfs_root *alloc_log_tree(struct btrfs_fs_info *fs_info) { struct btrfs_root *root; root = btrfs_alloc_root(fs_info, BTRFS_TREE_LOG_OBJECTID, GFP_NOFS); if (!root) return ERR_PTR(-ENOMEM); root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; root->root_key.type = BTRFS_ROOT_ITEM_KEY; root->root_key.offset = BTRFS_TREE_LOG_OBJECTID; return root; } int btrfs_alloc_log_tree_node(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct extent_buffer *leaf; /* * DON'T set SHAREABLE bit for log trees. * * Log trees are not exposed to user space thus can't be snapshotted, * and they go away before a real commit is actually done. * * They do store pointers to file data extents, and those reference * counts still get updated (along with back refs to the log tree). */ leaf = btrfs_alloc_tree_block(trans, root, 0, BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0, 0, BTRFS_NESTING_NORMAL); if (IS_ERR(leaf)) return PTR_ERR(leaf); root->node = leaf; btrfs_mark_buffer_dirty(trans, root->node); btrfs_tree_unlock(root->node); return 0; } int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { struct btrfs_root *log_root; log_root = alloc_log_tree(fs_info); if (IS_ERR(log_root)) return PTR_ERR(log_root); if (!btrfs_is_zoned(fs_info)) { int ret = btrfs_alloc_log_tree_node(trans, log_root); if (ret) { btrfs_put_root(log_root); return ret; } } WARN_ON(fs_info->log_root_tree); fs_info->log_root_tree = log_root; return 0; } int btrfs_add_log_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_root *log_root; struct btrfs_inode_item *inode_item; int ret; log_root = alloc_log_tree(fs_info); if (IS_ERR(log_root)) return PTR_ERR(log_root); ret = btrfs_alloc_log_tree_node(trans, log_root); if (ret) { btrfs_put_root(log_root); return ret; } btrfs_set_root_last_trans(log_root, trans->transid); log_root->root_key.offset = btrfs_root_id(root); inode_item = &log_root->root_item.inode; btrfs_set_stack_inode_generation(inode_item, 1); btrfs_set_stack_inode_size(inode_item, 3); btrfs_set_stack_inode_nlink(inode_item, 1); btrfs_set_stack_inode_nbytes(inode_item, fs_info->nodesize); btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); btrfs_set_root_node(&log_root->root_item, log_root->node); WARN_ON(root->log_root); root->log_root = log_root; btrfs_set_root_log_transid(root, 0); root->log_transid_committed = -1; btrfs_set_root_last_log_commit(root, 0); return 0; } static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root, struct btrfs_path *path, const struct btrfs_key *key) { struct btrfs_root *root; struct btrfs_tree_parent_check check = { 0 }; struct btrfs_fs_info *fs_info = tree_root->fs_info; u64 generation; int ret; int level; root = btrfs_alloc_root(fs_info, key->objectid, GFP_NOFS); if (!root) return ERR_PTR(-ENOMEM); ret = btrfs_find_root(tree_root, key, path, &root->root_item, &root->root_key); if (ret) { if (ret > 0) ret = -ENOENT; goto fail; } generation = btrfs_root_generation(&root->root_item); level = btrfs_root_level(&root->root_item); check.level = level; check.transid = generation; check.owner_root = key->objectid; root->node = read_tree_block(fs_info, btrfs_root_bytenr(&root->root_item), &check); if (IS_ERR(root->node)) { ret = PTR_ERR(root->node); root->node = NULL; goto fail; } if (unlikely(!btrfs_buffer_uptodate(root->node, generation, false))) { ret = -EIO; goto fail; } /* * For real fs, and not log/reloc trees, root owner must * match its root node owner */ if (unlikely(!btrfs_is_testing(fs_info) && btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID && btrfs_root_id(root) != BTRFS_TREE_RELOC_OBJECTID && btrfs_root_id(root) != btrfs_header_owner(root->node))) { btrfs_crit(fs_info, "root=%llu block=%llu, tree root owner mismatch, have %llu expect %llu", btrfs_root_id(root), root->node->start, btrfs_header_owner(root->node), btrfs_root_id(root)); ret = -EUCLEAN; goto fail; } root->commit_root = btrfs_root_node(root); return root; fail: btrfs_put_root(root); return ERR_PTR(ret); } struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, const struct btrfs_key *key) { struct btrfs_root *root; BTRFS_PATH_AUTO_FREE(path); path = btrfs_alloc_path(); if (!path) return ERR_PTR(-ENOMEM); root = read_tree_root_path(tree_root, path, key); return root; } /* * Initialize subvolume root in-memory structure. * * @anon_dev: anonymous device to attach to the root, if zero, allocate new * * In case of failure the caller is responsible to call btrfs_free_fs_root() */ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev) { int ret; btrfs_drew_lock_init(&root->snapshot_lock); if (btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID && !btrfs_is_data_reloc_root(root) && btrfs_is_fstree(btrfs_root_id(root))) { set_bit(BTRFS_ROOT_SHAREABLE, &root->state); btrfs_check_and_init_root_item(&root->root_item); } /* * Don't assign anonymous block device to roots that are not exposed to * userspace, the id pool is limited to 1M */ if (btrfs_is_fstree(btrfs_root_id(root)) && btrfs_root_refs(&root->root_item) > 0) { if (!anon_dev) { ret = get_anon_bdev(&root->anon_dev); if (ret) return ret; } else { root->anon_dev = anon_dev; } } mutex_lock(&root->objectid_mutex); ret = btrfs_init_root_free_objectid(root); if (ret) { mutex_unlock(&root->objectid_mutex); return ret; } ASSERT(root->free_objectid <= BTRFS_LAST_FREE_OBJECTID); mutex_unlock(&root->objectid_mutex); return 0; } static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, u64 root_id) { struct btrfs_root *root; spin_lock(&fs_info->fs_roots_radix_lock); root = radix_tree_lookup(&fs_info->fs_roots_radix, (unsigned long)root_id); root = btrfs_grab_root(root); spin_unlock(&fs_info->fs_roots_radix_lock); return root; } static struct btrfs_root *btrfs_get_global_root(struct btrfs_fs_info *fs_info, u64 objectid) { struct btrfs_key key = { .objectid = objectid, .type = BTRFS_ROOT_ITEM_KEY, .offset = 0, }; switch (objectid) { case BTRFS_ROOT_TREE_OBJECTID: return btrfs_grab_root(fs_info->tree_root); case BTRFS_EXTENT_TREE_OBJECTID: return btrfs_grab_root(btrfs_global_root(fs_info, &key)); case BTRFS_CHUNK_TREE_OBJECTID: return btrfs_grab_root(fs_info->chunk_root); case BTRFS_DEV_TREE_OBJECTID: return btrfs_grab_root(fs_info->dev_root); case BTRFS_CSUM_TREE_OBJECTID: return btrfs_grab_root(btrfs_global_root(fs_info, &key)); case BTRFS_QUOTA_TREE_OBJECTID: return btrfs_grab_root(fs_info->quota_root); case BTRFS_UUID_TREE_OBJECTID: return btrfs_grab_root(fs_info->uuid_root); case BTRFS_BLOCK_GROUP_TREE_OBJECTID: return btrfs_grab_root(fs_info->block_group_root); case BTRFS_FREE_SPACE_TREE_OBJECTID: return btrfs_grab_root(btrfs_global_root(fs_info, &key)); case BTRFS_RAID_STRIPE_TREE_OBJECTID: return btrfs_grab_root(fs_info->stripe_root); default: return NULL; } } int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) { int ret; ret = radix_tree_preload(GFP_NOFS); if (ret) return ret; spin_lock(&fs_info->fs_roots_radix_lock); ret = radix_tree_insert(&fs_info->fs_roots_radix, (unsigned long)btrfs_root_id(root), root); if (ret == 0) { btrfs_grab_root(root); set_bit(BTRFS_ROOT_IN_RADIX, &root->state); } spin_unlock(&fs_info->fs_roots_radix_lock); radix_tree_preload_end(); return ret; } void btrfs_check_leaked_roots(const struct btrfs_fs_info *fs_info) { #ifdef CONFIG_BTRFS_DEBUG struct btrfs_root *root; while (!list_empty(&fs_info->allocated_roots)) { char buf[BTRFS_ROOT_NAME_BUF_LEN]; root = list_first_entry(&fs_info->allocated_roots, struct btrfs_root, leak_list); btrfs_err(fs_info, "leaked root %s refcount %d", btrfs_root_name(&root->root_key, buf), refcount_read(&root->refs)); WARN_ON_ONCE(1); while (refcount_read(&root->refs) > 1) btrfs_put_root(root); btrfs_put_root(root); } #endif } static void free_global_roots(struct btrfs_fs_info *fs_info) { struct btrfs_root *root; struct rb_node *node; while ((node = rb_first_postorder(&fs_info->global_root_tree)) != NULL) { root = rb_entry(node, struct btrfs_root, rb_node); rb_erase(&root->rb_node, &fs_info->global_root_tree); btrfs_put_root(root); } } void btrfs_free_fs_info(struct btrfs_fs_info *fs_info) { struct percpu_counter *em_counter = &fs_info->evictable_extent_maps; if (fs_info->fs_devices) btrfs_close_devices(fs_info->fs_devices); btrfs_free_compress_wsm(fs_info); percpu_counter_destroy(&fs_info->stats_read_blocks); percpu_counter_destroy(&fs_info->dirty_metadata_bytes); percpu_counter_destroy(&fs_info->delalloc_bytes); percpu_counter_destroy(&fs_info->ordered_bytes); if (percpu_counter_initialized(em_counter)) ASSERT(percpu_counter_sum_positive(em_counter) == 0); percpu_counter_destroy(em_counter); percpu_counter_destroy(&fs_info->dev_replace.bio_counter); btrfs_free_csum_hash(fs_info); btrfs_free_stripe_hash_table(fs_info); btrfs_free_ref_cache(fs_info); kfree(fs_info->balance_ctl); kfree(fs_info->delayed_root); free_global_roots(fs_info); btrfs_put_root(fs_info->tree_root); btrfs_put_root(fs_info->chunk_root); btrfs_put_root(fs_info->dev_root); btrfs_put_root(fs_info->quota_root); btrfs_put_root(fs_info->uuid_root); btrfs_put_root(fs_info->fs_root); btrfs_put_root(fs_info->data_reloc_root); btrfs_put_root(fs_info->block_group_root); btrfs_put_root(fs_info->stripe_root); btrfs_check_leaked_roots(fs_info); btrfs_extent_buffer_leak_debug_check(fs_info); kfree(fs_info->super_copy); kfree(fs_info->super_for_commit); kvfree(fs_info); } /* * Get an in-memory reference of a root structure. * * For essential trees like root/extent tree, we grab it from fs_info directly. * For subvolume trees, we check the cached filesystem roots first. If not * found, then read it from disk and add it to cached fs roots. * * Caller should release the root by calling btrfs_put_root() after the usage. * * NOTE: Reloc and log trees can't be read by this function as they share the * same root objectid. * * @objectid: root id * @anon_dev: preallocated anonymous block device number for new roots, * pass NULL for a new allocation. * @check_ref: whether to check root item references, If true, return -ENOENT * for orphan roots */ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info, u64 objectid, dev_t *anon_dev, bool check_ref) { struct btrfs_root *root; struct btrfs_path *path; struct btrfs_key key; int ret; root = btrfs_get_global_root(fs_info, objectid); if (root) return root; /* * If we're called for non-subvolume trees, and above function didn't * find one, do not try to read it from disk. * * This is namely for free-space-tree and quota tree, which can change * at runtime and should only be grabbed from fs_info. */ if (!btrfs_is_fstree(objectid) && objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) return ERR_PTR(-ENOENT); again: root = btrfs_lookup_fs_root(fs_info, objectid); if (root) { /* * Some other caller may have read out the newly inserted * subvolume already (for things like backref walk etc). Not * that common but still possible. In that case, we just need * to free the anon_dev. */ if (unlikely(anon_dev && *anon_dev)) { free_anon_bdev(*anon_dev); *anon_dev = 0; } if (check_ref && btrfs_root_refs(&root->root_item) == 0) { btrfs_put_root(root); return ERR_PTR(-ENOENT); } return root; } key.objectid = objectid; key.type = BTRFS_ROOT_ITEM_KEY; key.offset = (u64)-1; root = btrfs_read_tree_root(fs_info->tree_root, &key); if (IS_ERR(root)) return root; if (check_ref && btrfs_root_refs(&root->root_item) == 0) { ret = -ENOENT; goto fail; } ret = btrfs_init_fs_root(root, anon_dev ? *anon_dev : 0); if (ret) goto fail; path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; goto fail; } key.objectid = BTRFS_ORPHAN_OBJECTID; key.type = BTRFS_ORPHAN_ITEM_KEY; key.offset = objectid; ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0); btrfs_free_path(path); if (ret < 0) goto fail; if (ret == 0) set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state); ret = btrfs_insert_fs_root(fs_info, root); if (ret) { if (ret == -EEXIST) { btrfs_put_root(root); goto again; } goto fail; } return root; fail: /* * If our caller provided us an anonymous device, then it's his * responsibility to free it in case we fail. So we have to set our * root's anon_dev to 0 to avoid a double free, once by btrfs_put_root() * and once again by our caller. */ if (anon_dev && *anon_dev) root->anon_dev = 0; btrfs_put_root(root); return ERR_PTR(ret); } /* * Get in-memory reference of a root structure * * @objectid: tree objectid * @check_ref: if set, verify that the tree exists and the item has at least * one reference */ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, u64 objectid, bool check_ref) { return btrfs_get_root_ref(fs_info, objectid, NULL, check_ref); } /* * Get in-memory reference of a root structure, created as new, optionally pass * the anonymous block device id * * @objectid: tree objectid * @anon_dev: if NULL, allocate a new anonymous block device or use the * parameter value if not NULL */ struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, u64 objectid, dev_t *anon_dev) { return btrfs_get_root_ref(fs_info, objectid, anon_dev, true); } /* * Return a root for the given objectid. * * @fs_info: the fs_info * @objectid: the objectid we need to lookup * * This is exclusively used for backref walking, and exists specifically because * of how qgroups does lookups. Qgroups will do a backref lookup at delayed ref * creation time, which means we may have to read the tree_root in order to look * up a fs root that is not in memory. If the root is not in memory we will * read the tree root commit root and look up the fs root from there. This is a * temporary root, it will not be inserted into the radix tree as it doesn't * have the most uptodate information, it'll simply be discarded once the * backref code is finished using the root. */ struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 objectid) { struct btrfs_root *root; struct btrfs_key key; ASSERT(path->search_commit_root && path->skip_locking); /* * This can return -ENOENT if we ask for a root that doesn't exist, but * since this is called via the backref walking code we won't be looking * up a root that doesn't exist, unless there's corruption. So if root * != NULL just return it. */ root = btrfs_get_global_root(fs_info, objectid); if (root) return root; root = btrfs_lookup_fs_root(fs_info, objectid); if (root) return root; key.objectid = objectid; key.type = BTRFS_ROOT_ITEM_KEY; key.offset = (u64)-1; root = read_tree_root_path(fs_info->tree_root, path, &key); btrfs_release_path(path); return root; } static int cleaner_kthread(void *arg) { struct btrfs_fs_info *fs_info = arg; int again; while (1) { again = 0; set_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags); /* Make the cleaner go to sleep early. */ if (btrfs_need_cleaner_sleep(fs_info)) goto sleep; /* * Do not do anything if we might cause open_ctree() to block * before we have finished mounting the filesystem. */ if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags)) goto sleep; if (!mutex_trylock(&fs_info->cleaner_mutex)) goto sleep; /* * Avoid the problem that we change the status of the fs * during the above check and trylock. */ if (btrfs_need_cleaner_sleep(fs_info)) { mutex_unlock(&fs_info->cleaner_mutex); goto sleep; } if (test_and_clear_bit(BTRFS_FS_FEATURE_CHANGED, &fs_info->flags)) btrfs_sysfs_feature_update(fs_info); btrfs_run_delayed_iputs(fs_info); again = btrfs_clean_one_deleted_snapshot(fs_info); mutex_unlock(&fs_info->cleaner_mutex); /* * The defragger has dealt with the R/O remount and umount, * needn't do anything special here. */ btrfs_run_defrag_inodes(fs_info); /* * Acquires fs_info->reclaim_bgs_lock to avoid racing * with relocation (btrfs_relocate_chunk) and relocation * acquires fs_info->cleaner_mutex (btrfs_relocate_block_group) * after acquiring fs_info->reclaim_bgs_lock. So we * can't hold, nor need to, fs_info->cleaner_mutex when deleting * unused block groups. */ btrfs_delete_unused_bgs(fs_info); /* * Reclaim block groups in the reclaim_bgs list after we deleted * all unused block_groups. This possibly gives us some more free * space. */ btrfs_reclaim_bgs(fs_info); sleep: clear_and_wake_up_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags); if (kthread_should_park()) kthread_parkme(); if (kthread_should_stop()) return 0; if (!again) { set_current_state(TASK_INTERRUPTIBLE); schedule(); __set_current_state(TASK_RUNNING); } } } static int transaction_kthread(void *arg) { struct btrfs_root *root = arg; struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_trans_handle *trans; struct btrfs_transaction *cur; u64 transid; time64_t delta; unsigned long delay; bool cannot_commit; do { cannot_commit = false; delay = secs_to_jiffies(fs_info->commit_interval); mutex_lock(&fs_info->transaction_kthread_mutex); spin_lock(&fs_info->trans_lock); cur = fs_info->running_transaction; if (!cur) { spin_unlock(&fs_info->trans_lock); goto sleep; } delta = ktime_get_seconds() - cur->start_time; if (!test_and_clear_bit(BTRFS_FS_COMMIT_TRANS, &fs_info->flags) && cur->state < TRANS_STATE_COMMIT_PREP && delta < fs_info->commit_interval) { spin_unlock(&fs_info->trans_lock); delay -= secs_to_jiffies(delta - 1); delay = min(delay, secs_to_jiffies(fs_info->commit_interval)); goto sleep; } transid = cur->transid; spin_unlock(&fs_info->trans_lock); /* If the file system is aborted, this will always fail. */ trans = btrfs_attach_transaction(root); if (IS_ERR(trans)) { if (PTR_ERR(trans) != -ENOENT) cannot_commit = true; goto sleep; } if (transid == trans->transid) { btrfs_commit_transaction(trans); } else { btrfs_end_transaction(trans); } sleep: wake_up_process(fs_info->cleaner_kthread); mutex_unlock(&fs_info->transaction_kthread_mutex); if (BTRFS_FS_ERROR(fs_info)) btrfs_cleanup_transaction(fs_info); if (!kthread_should_stop() && (!btrfs_transaction_blocked(fs_info) || cannot_commit)) schedule_timeout_interruptible(delay); } while (!kthread_should_stop()); return 0; } /* * This will find the highest generation in the array of root backups. The * index of the highest array is returned, or -EINVAL if we can't find * anything. * * We check to make sure the array is valid by comparing the * generation of the latest root in the array with the generation * in the super block. If they don't match we pitch it. */ static int find_newest_super_backup(struct btrfs_fs_info *info) { const u64 newest_gen = btrfs_super_generation(info->super_copy); u64 cur; struct btrfs_root_backup *root_backup; int i; for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) { root_backup = info->super_copy->super_roots + i; cur = btrfs_backup_tree_root_gen(root_backup); if (cur == newest_gen) return i; } return -EINVAL; } /* * copy all the root pointers into the super backup array. * this will bump the backup pointer by one when it is * done */ static void backup_super_roots(struct btrfs_fs_info *info) { const int next_backup = info->backup_root_index; struct btrfs_root_backup *root_backup; root_backup = info->super_for_commit->super_roots + next_backup; /* * make sure all of our padding and empty slots get zero filled * regardless of which ones we use today */ memset(root_backup, 0, sizeof(*root_backup)); info->backup_root_index = (next_backup + 1) % BTRFS_NUM_BACKUP_ROOTS; btrfs_set_backup_tree_root(root_backup, info->tree_root->node->start); btrfs_set_backup_tree_root_gen(root_backup, btrfs_header_generation(info->tree_root->node)); btrfs_set_backup_tree_root_level(root_backup, btrfs_header_level(info->tree_root->node)); btrfs_set_backup_chunk_root(root_backup, info->chunk_root->node->start); btrfs_set_backup_chunk_root_gen(root_backup, btrfs_header_generation(info->chunk_root->node)); btrfs_set_backup_chunk_root_level(root_backup, btrfs_header_level(info->chunk_root->node)); if (!btrfs_fs_compat_ro(info, BLOCK_GROUP_TREE)) { struct btrfs_root *extent_root = btrfs_extent_root(info, 0); struct btrfs_root *csum_root = btrfs_csum_root(info, 0); btrfs_set_backup_extent_root(root_backup, extent_root->node->start); btrfs_set_backup_extent_root_gen(root_backup, btrfs_header_generation(extent_root->node)); btrfs_set_backup_extent_root_level(root_backup, btrfs_header_level(extent_root->node)); btrfs_set_backup_csum_root(root_backup, csum_root->node->start); btrfs_set_backup_csum_root_gen(root_backup, btrfs_header_generation(csum_root->node)); btrfs_set_backup_csum_root_level(root_backup, btrfs_header_level(csum_root->node)); } /* * we might commit during log recovery, which happens before we set * the fs_root. Make sure it is valid before we fill it in. */ if (info->fs_root && info->fs_root->node) { btrfs_set_backup_fs_root(root_backup, info->fs_root->node->start); btrfs_set_backup_fs_root_gen(root_backup, btrfs_header_generation(info->fs_root->node)); btrfs_set_backup_fs_root_level(root_backup, btrfs_header_level(info->fs_root->node)); } btrfs_set_backup_dev_root(root_backup, info->dev_root->node->start); btrfs_set_backup_dev_root_gen(root_backup, btrfs_header_generation(info->dev_root->node)); btrfs_set_backup_dev_root_level(root_backup, btrfs_header_level(info->dev_root->node)); btrfs_set_backup_total_bytes(root_backup, btrfs_super_total_bytes(info->super_copy)); btrfs_set_backup_bytes_used(root_backup, btrfs_super_bytes_used(info->super_copy)); btrfs_set_backup_num_devices(root_backup, btrfs_super_num_devices(info->super_copy)); /* * if we don't copy this out to the super_copy, it won't get remembered * for the next commit */ memcpy(&info->super_copy->super_roots, &info->super_for_commit->super_roots, sizeof(*root_backup) * BTRFS_NUM_BACKUP_ROOTS); } /* * Reads a backup root based on the passed priority. Prio 0 is the newest, prio * 1/2/3 are 2nd newest/3rd newest/4th (oldest) backup roots * * @fs_info: filesystem whose backup roots need to be read * @priority: priority of backup root required * * Returns backup root index on success and -EINVAL otherwise. */ static int read_backup_root(struct btrfs_fs_info *fs_info, u8 priority) { int backup_index = find_newest_super_backup(fs_info); struct btrfs_super_block *super = fs_info->super_copy; struct btrfs_root_backup *root_backup; if (priority < BTRFS_NUM_BACKUP_ROOTS && backup_index >= 0) { if (priority == 0) return backup_index; backup_index = backup_index + BTRFS_NUM_BACKUP_ROOTS - priority; backup_index %= BTRFS_NUM_BACKUP_ROOTS; } else { return -EINVAL; } root_backup = super->super_roots + backup_index; btrfs_set_super_generation(super, btrfs_backup_tree_root_gen(root_backup)); btrfs_set_super_root(super, btrfs_backup_tree_root(root_backup)); btrfs_set_super_root_level(super, btrfs_backup_tree_root_level(root_backup)); btrfs_set_super_bytes_used(super, btrfs_backup_bytes_used(root_backup)); /* * Fixme: the total bytes and num_devices need to match or we should * need a fsck */ btrfs_set_super_total_bytes(super, btrfs_backup_total_bytes(root_backup)); btrfs_set_super_num_devices(super, btrfs_backup_num_devices(root_backup)); return backup_index; } /* helper to cleanup workers */ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) { btrfs_destroy_workqueue(fs_info->fixup_workers); btrfs_destroy_workqueue(fs_info->delalloc_workers); btrfs_destroy_workqueue(fs_info->workers); if (fs_info->endio_workers) destroy_workqueue(fs_info->endio_workers); if (fs_info->rmw_workers) destroy_workqueue(fs_info->rmw_workers); btrfs_destroy_workqueue(fs_info->endio_write_workers); btrfs_destroy_workqueue(fs_info->endio_freespace_worker); btrfs_destroy_workqueue(fs_info->delayed_workers); btrfs_destroy_workqueue(fs_info->caching_workers); btrfs_destroy_workqueue(fs_info->flush_workers); btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers); if (fs_info->discard_ctl.discard_workers) destroy_workqueue(fs_info->discard_ctl.discard_workers); /* * Now that all other work queues are destroyed, we can safely destroy * the queues used for metadata I/O, since tasks from those other work * queues can do metadata I/O operations. */ if (fs_info->endio_meta_workers) destroy_workqueue(fs_info->endio_meta_workers); } static void free_root_extent_buffers(struct btrfs_root *root) { if (root) { free_extent_buffer(root->node); free_extent_buffer(root->commit_root); root->node = NULL; root->commit_root = NULL; } } static void free_global_root_pointers(struct btrfs_fs_info *fs_info) { struct btrfs_root *root, *tmp; rbtree_postorder_for_each_entry_safe(root, tmp, &fs_info->global_root_tree, rb_node) free_root_extent_buffers(root); } /* helper to cleanup tree roots */ static void free_root_pointers(struct btrfs_fs_info *info, bool free_chunk_root) { free_root_extent_buffers(info->tree_root); free_global_root_pointers(info); free_root_extent_buffers(info->dev_root); free_root_extent_buffers(info->quota_root); free_root_extent_buffers(info->uuid_root); free_root_extent_buffers(info->fs_root); free_root_extent_buffers(info->data_reloc_root); free_root_extent_buffers(info->block_group_root); free_root_extent_buffers(info->stripe_root); if (free_chunk_root) free_root_extent_buffers(info->chunk_root); } void btrfs_put_root(struct btrfs_root *root) { if (!root) return; if (refcount_dec_and_test(&root->refs)) { if (WARN_ON(!xa_empty(&root->inodes))) xa_destroy(&root->inodes); if (WARN_ON(!xa_empty(&root->delayed_nodes))) xa_destroy(&root->delayed_nodes); WARN_ON(test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state)); if (root->anon_dev) free_anon_bdev(root->anon_dev); free_root_extent_buffers(root); #ifdef CONFIG_BTRFS_DEBUG spin_lock(&root->fs_info->fs_roots_radix_lock); list_del_init(&root->leak_list); spin_unlock(&root->fs_info->fs_roots_radix_lock); #endif kfree(root); } } void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info) { int ret; struct btrfs_root *gang[8]; int i; while (!list_empty(&fs_info->dead_roots)) { gang[0] = list_first_entry(&fs_info->dead_roots, struct btrfs_root, root_list); list_del(&gang[0]->root_list); if (test_bit(BTRFS_ROOT_IN_RADIX, &gang[0]->state)) btrfs_drop_and_free_fs_root(fs_info, gang[0]); btrfs_put_root(gang[0]); } while (1) { ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, (void **)gang, 0, ARRAY_SIZE(gang)); if (!ret) break; for (i = 0; i < ret; i++) btrfs_drop_and_free_fs_root(fs_info, gang[i]); } } static void btrfs_init_scrub(struct btrfs_fs_info *fs_info) { mutex_init(&fs_info->scrub_lock); atomic_set(&fs_info->scrubs_running, 0); atomic_set(&fs_info->scrub_pause_req, 0); atomic_set(&fs_info->scrubs_paused, 0); atomic_set(&fs_info->scrub_cancel_req, 0); init_waitqueue_head(&fs_info->scrub_pause_wait); refcount_set(&fs_info->scrub_workers_refcnt, 0); } static void btrfs_init_balance(struct btrfs_fs_info *fs_info) { spin_lock_init(&fs_info->balance_lock); mutex_init(&fs_info->balance_mutex); atomic_set(&fs_info->balance_pause_req, 0); atomic_set(&fs_info->balance_cancel_req, 0); fs_info->balance_ctl = NULL; init_waitqueue_head(&fs_info->balance_wait_q); atomic_set(&fs_info->reloc_cancel_req, 0); } static int btrfs_init_btree_inode(struct super_block *sb) { struct btrfs_fs_info *fs_info = btrfs_sb(sb); unsigned long hash = btrfs_inode_hash(BTRFS_BTREE_INODE_OBJECTID, fs_info->tree_root); struct inode *inode; inode = new_inode(sb); if (!inode) return -ENOMEM; btrfs_set_inode_number(BTRFS_I(inode), BTRFS_BTREE_INODE_OBJECTID); set_nlink(inode, 1); /* * we set the i_size on the btree inode to the max possible int. * the real end of the address space is determined by all of * the devices in the system */ inode->i_size = OFFSET_MAX; inode->i_mapping->a_ops = &btree_aops; mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); btrfs_extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree, IO_TREE_BTREE_INODE_IO); btrfs_extent_map_tree_init(&BTRFS_I(inode)->extent_tree); BTRFS_I(inode)->root = btrfs_grab_root(fs_info->tree_root); set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags); __insert_inode_hash(inode, hash); set_bit(AS_KERNEL_FILE, &inode->i_mapping->flags); fs_info->btree_inode = inode; return 0; } static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info) { mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); init_rwsem(&fs_info->dev_replace.rwsem); init_waitqueue_head(&fs_info->dev_replace.replace_wait); } static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info) { spin_lock_init(&fs_info->qgroup_lock); mutex_init(&fs_info->qgroup_ioctl_lock); fs_info->qgroup_tree = RB_ROOT; INIT_LIST_HEAD(&fs_info->dirty_qgroups); fs_info->qgroup_seq = 1; fs_info->qgroup_rescan_running = false; fs_info->qgroup_drop_subtree_thres = BTRFS_QGROUP_DROP_SUBTREE_THRES_DEFAULT; mutex_init(&fs_info->qgroup_rescan_lock); } static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info) { u32 max_active = fs_info->thread_pool_size; unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND; unsigned int ordered_flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU; fs_info->workers = btrfs_alloc_workqueue(fs_info, "worker", flags, max_active, 16); fs_info->delalloc_workers = btrfs_alloc_workqueue(fs_info, "delalloc", flags, max_active, 2); fs_info->flush_workers = btrfs_alloc_workqueue(fs_info, "flush_delalloc", flags, max_active, 0); fs_info->caching_workers = btrfs_alloc_workqueue(fs_info, "cache", flags, max_active, 0); fs_info->fixup_workers = btrfs_alloc_ordered_workqueue(fs_info, "fixup", ordered_flags); fs_info->endio_workers = alloc_workqueue("btrfs-endio", flags, max_active); fs_info->endio_meta_workers = alloc_workqueue("btrfs-endio-meta", flags, max_active); fs_info->rmw_workers = alloc_workqueue("btrfs-rmw", flags, max_active); fs_info->endio_write_workers = btrfs_alloc_workqueue(fs_info, "endio-write", flags, max_active, 2); fs_info->endio_freespace_worker = btrfs_alloc_workqueue(fs_info, "freespace-write", flags, max_active, 0); fs_info->delayed_workers = btrfs_alloc_workqueue(fs_info, "delayed-meta", flags, max_active, 0); fs_info->qgroup_rescan_workers = btrfs_alloc_ordered_workqueue(fs_info, "qgroup-rescan", ordered_flags); fs_info->discard_ctl.discard_workers = alloc_ordered_workqueue("btrfs-discard", WQ_FREEZABLE); if (!(fs_info->workers && fs_info->delalloc_workers && fs_info->flush_workers && fs_info->endio_workers && fs_info->endio_meta_workers && fs_info->endio_write_workers && fs_info->endio_freespace_worker && fs_info->rmw_workers && fs_info->caching_workers && fs_info->fixup_workers && fs_info->delayed_workers && fs_info->qgroup_rescan_workers && fs_info->discard_ctl.discard_workers)) { return -ENOMEM; } return 0; } static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type) { struct crypto_shash *csum_shash; const char *csum_driver = btrfs_super_csum_driver(csum_type); csum_shash = crypto_alloc_shash(csum_driver, 0, 0); if (IS_ERR(csum_shash)) { btrfs_err(fs_info, "error allocating %s hash for checksum", csum_driver); return PTR_ERR(csum_shash); } fs_info->csum_shash = csum_shash; /* Check if the checksum implementation is a fast accelerated one. */ switch (csum_type) { case BTRFS_CSUM_TYPE_CRC32: if (crc32_optimizations() & CRC32C_OPTIMIZATION) set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags); break; case BTRFS_CSUM_TYPE_XXHASH: set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags); break; default: break; } btrfs_info(fs_info, "using %s (%s) checksum algorithm", btrfs_super_csum_name(csum_type), crypto_shash_driver_name(csum_shash)); return 0; } static int btrfs_replay_log(struct btrfs_fs_info *fs_info, struct btrfs_fs_devices *fs_devices) { int ret; struct btrfs_tree_parent_check check = { 0 }; struct btrfs_root *log_tree_root; struct btrfs_super_block *disk_super = fs_info->super_copy; u64 bytenr = btrfs_super_log_root(disk_super); int level = btrfs_super_log_root_level(disk_super); if (unlikely(fs_devices->rw_devices == 0)) { btrfs_warn(fs_info, "log replay required on RO media"); return -EIO; } log_tree_root = btrfs_alloc_root(fs_info, BTRFS_TREE_LOG_OBJECTID, GFP_KERNEL); if (!log_tree_root) return -ENOMEM; check.level = level; check.transid = fs_info->generation + 1; check.owner_root = BTRFS_TREE_LOG_OBJECTID; log_tree_root->node = read_tree_block(fs_info, bytenr, &check); if (IS_ERR(log_tree_root->node)) { btrfs_warn(fs_info, "failed to read log tree"); ret = PTR_ERR(log_tree_root->node); log_tree_root->node = NULL; btrfs_put_root(log_tree_root); return ret; } if (unlikely(!extent_buffer_uptodate(log_tree_root->node))) { btrfs_err(fs_info, "failed to read log tree"); btrfs_put_root(log_tree_root); return -EIO; } /* returns with log_tree_root freed on success */ ret = btrfs_recover_log_trees(log_tree_root); btrfs_put_root(log_tree_root); if (ret) { btrfs_handle_fs_error(fs_info, ret, "Failed to recover log tree"); return ret; } if (sb_rdonly(fs_info->sb)) { ret = btrfs_commit_super(fs_info); if (ret) return ret; } return 0; } static int load_global_roots_objectid(struct btrfs_root *tree_root, struct btrfs_path *path, u64 objectid, const char *name) { struct btrfs_fs_info *fs_info = tree_root->fs_info; struct btrfs_root *root; u64 max_global_id = 0; int ret; struct btrfs_key key = { .objectid = objectid, .type = BTRFS_ROOT_ITEM_KEY, .offset = 0, }; bool found = false; /* If we have IGNOREDATACSUMS skip loading these roots. */ if (objectid == BTRFS_CSUM_TREE_OBJECTID && btrfs_test_opt(fs_info, IGNOREDATACSUMS)) { set_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state); return 0; } while (1) { ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); if (ret < 0) break; if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { ret = btrfs_next_leaf(tree_root, path); if (ret) { if (ret > 0) ret = 0; break; } } ret = 0; btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); if (key.objectid != objectid) break; btrfs_release_path(path); /* * Just worry about this for extent tree, it'll be the same for * everybody. */ if (objectid == BTRFS_EXTENT_TREE_OBJECTID) max_global_id = max(max_global_id, key.offset); found = true; root = read_tree_root_path(tree_root, path, &key); if (IS_ERR(root)) { ret = PTR_ERR(root); break; } set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); ret = btrfs_global_root_insert(root); if (ret) { btrfs_put_root(root); break; } key.offset++; } btrfs_release_path(path); if (objectid == BTRFS_EXTENT_TREE_OBJECTID) fs_info->nr_global_roots = max_global_id + 1; if (!found || ret) { if (objectid == BTRFS_CSUM_TREE_OBJECTID) set_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state); if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) ret = ret ? ret : -ENOENT; else ret = 0; btrfs_err(fs_info, "failed to load root %s", name); } return ret; } static int load_global_roots(struct btrfs_root *tree_root) { BTRFS_PATH_AUTO_FREE(path); int ret; path = btrfs_alloc_path(); if (!path) return -ENOMEM; ret = load_global_roots_objectid(tree_root, path, BTRFS_EXTENT_TREE_OBJECTID, "extent"); if (ret) return ret; ret = load_global_roots_objectid(tree_root, path, BTRFS_CSUM_TREE_OBJECTID, "csum"); if (ret) return ret; if (!btrfs_fs_compat_ro(tree_root->fs_info, FREE_SPACE_TREE)) return ret; ret = load_global_roots_objectid(tree_root, path, BTRFS_FREE_SPACE_TREE_OBJECTID, "free space"); return ret; } static int btrfs_read_roots(struct btrfs_fs_info *fs_info) { struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *root; struct btrfs_key location; int ret; ASSERT(fs_info->tree_root); ret = load_global_roots(tree_root); if (ret) return ret; location.type = BTRFS_ROOT_ITEM_KEY; location.offset = 0; if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE)) { location.objectid = BTRFS_BLOCK_GROUP_TREE_OBJECTID; root = btrfs_read_tree_root(tree_root, &location); if (IS_ERR(root)) { if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) { ret = PTR_ERR(root); goto out; } } else { set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); fs_info->block_group_root = root; } } location.objectid = BTRFS_DEV_TREE_OBJECTID; root = btrfs_read_tree_root(tree_root, &location); if (IS_ERR(root)) { if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) { ret = PTR_ERR(root); goto out; } } else { set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); fs_info->dev_root = root; } /* Initialize fs_info for all devices in any case */ ret = btrfs_init_devices_late(fs_info); if (ret) goto out; /* * This tree can share blocks with some other fs tree during relocation * and we need a proper setup by btrfs_get_fs_root */ root = btrfs_get_fs_root(tree_root->fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID, true); if (IS_ERR(root)) { if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) { ret = PTR_ERR(root); goto out; } } else { set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); fs_info->data_reloc_root = root; } location.objectid = BTRFS_QUOTA_TREE_OBJECTID; root = btrfs_read_tree_root(tree_root, &location); if (!IS_ERR(root)) { set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); fs_info->quota_root = root; } location.objectid = BTRFS_UUID_TREE_OBJECTID; root = btrfs_read_tree_root(tree_root, &location); if (IS_ERR(root)) { if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) { ret = PTR_ERR(root); if (ret != -ENOENT) goto out; } } else { set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); fs_info->uuid_root = root; } if (btrfs_fs_incompat(fs_info, RAID_STRIPE_TREE)) { location.objectid = BTRFS_RAID_STRIPE_TREE_OBJECTID; root = btrfs_read_tree_root(tree_root, &location); if (IS_ERR(root)) { if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) { ret = PTR_ERR(root); goto out; } } else { set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); fs_info->stripe_root = root; } } return 0; out: btrfs_warn(fs_info, "failed to read root (objectid=%llu): %d", location.objectid, ret); return ret; } static int validate_sys_chunk_array(const struct btrfs_fs_info *fs_info, const struct btrfs_super_block *sb) { unsigned int cur = 0; /* Offset inside the sys chunk array */ /* * At sb read time, fs_info is not fully initialized. Thus we have * to use super block sectorsize, which should have been validated. */ const u32 sectorsize = btrfs_super_sectorsize(sb); u32 sys_array_size = btrfs_super_sys_array_size(sb); if (unlikely(sys_array_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)) { btrfs_err(fs_info, "system chunk array too big %u > %u", sys_array_size, BTRFS_SYSTEM_CHUNK_ARRAY_SIZE); return -EUCLEAN; } while (cur < sys_array_size) { struct btrfs_disk_key *disk_key; struct btrfs_chunk *chunk; struct btrfs_key key; u64 type; u16 num_stripes; u32 len; int ret; disk_key = (struct btrfs_disk_key *)(sb->sys_chunk_array + cur); len = sizeof(*disk_key); if (unlikely(cur + len > sys_array_size)) goto short_read; cur += len; btrfs_disk_key_to_cpu(&key, disk_key); if (unlikely(key.type != BTRFS_CHUNK_ITEM_KEY)) { btrfs_err(fs_info, "unexpected item type %u in sys_array at offset %u", key.type, cur); return -EUCLEAN; } chunk = (struct btrfs_chunk *)(sb->sys_chunk_array + cur); num_stripes = btrfs_stack_chunk_num_stripes(chunk); if (unlikely(cur + btrfs_chunk_item_size(num_stripes) > sys_array_size)) goto short_read; type = btrfs_stack_chunk_type(chunk); if (unlikely(!(type & BTRFS_BLOCK_GROUP_SYSTEM))) { btrfs_err(fs_info, "invalid chunk type %llu in sys_array at offset %u", type, cur); return -EUCLEAN; } ret = btrfs_check_chunk_valid(fs_info, NULL, chunk, key.offset, sectorsize); if (ret < 0) return ret; cur += btrfs_chunk_item_size(num_stripes); } return 0; short_read: btrfs_err(fs_info, "super block sys chunk array short read, cur=%u sys_array_size=%u", cur, sys_array_size); return -EUCLEAN; } /* * Real super block validation * NOTE: super csum type and incompat features will not be checked here. * * @sb: super block to check * @mirror_num: the super block number to check its bytenr: * 0 the primary (1st) sb * 1, 2 2nd and 3rd backup copy * -1 skip bytenr check */ int btrfs_validate_super(const struct btrfs_fs_info *fs_info, const struct btrfs_super_block *sb, int mirror_num) { u64 nodesize = btrfs_super_nodesize(sb); u64 sectorsize = btrfs_super_sectorsize(sb); int ret = 0; const bool ignore_flags = btrfs_test_opt(fs_info, IGNORESUPERFLAGS); if (btrfs_super_magic(sb) != BTRFS_MAGIC) { btrfs_err(fs_info, "no valid FS found"); ret = -EINVAL; } if ((btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP)) { if (!ignore_flags) { btrfs_err(fs_info, "unrecognized or unsupported super flag 0x%llx", btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP); ret = -EINVAL; } else { btrfs_info(fs_info, "unrecognized or unsupported super flags: 0x%llx, ignored", btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP); } } if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) { btrfs_err(fs_info, "tree_root level too big: %d >= %d", btrfs_super_root_level(sb), BTRFS_MAX_LEVEL); ret = -EINVAL; } if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) { btrfs_err(fs_info, "chunk_root level too big: %d >= %d", btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL); ret = -EINVAL; } if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) { btrfs_err(fs_info, "log_root level too big: %d >= %d", btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL); ret = -EINVAL; } /* * Check sectorsize and nodesize first, other check will need it. * Check all possible sectorsize(4K, 8K, 16K, 32K, 64K) here. */ if (!is_power_of_2(sectorsize) || sectorsize < BTRFS_MIN_BLOCKSIZE || sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) { btrfs_err(fs_info, "invalid sectorsize %llu", sectorsize); ret = -EINVAL; } if (!btrfs_supported_blocksize(sectorsize)) { btrfs_err(fs_info, "sectorsize %llu not yet supported for page size %lu", sectorsize, PAGE_SIZE); ret = -EINVAL; } if (!is_power_of_2(nodesize) || nodesize < sectorsize || nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) { btrfs_err(fs_info, "invalid nodesize %llu", nodesize); ret = -EINVAL; } if (nodesize != le32_to_cpu(sb->__unused_leafsize)) { btrfs_err(fs_info, "invalid leafsize %u, should be %llu", le32_to_cpu(sb->__unused_leafsize), nodesize); ret = -EINVAL; } /* Root alignment check */ if (!IS_ALIGNED(btrfs_super_root(sb), sectorsize)) { btrfs_warn(fs_info, "tree_root block unaligned: %llu", btrfs_super_root(sb)); ret = -EINVAL; } if (!IS_ALIGNED(btrfs_super_chunk_root(sb), sectorsize)) { btrfs_warn(fs_info, "chunk_root block unaligned: %llu", btrfs_super_chunk_root(sb)); ret = -EINVAL; } if (!IS_ALIGNED(btrfs_super_log_root(sb), sectorsize)) { btrfs_warn(fs_info, "log_root block unaligned: %llu", btrfs_super_log_root(sb)); ret = -EINVAL; } if (!fs_info->fs_devices->temp_fsid && memcmp(fs_info->fs_devices->fsid, sb->fsid, BTRFS_FSID_SIZE) != 0) { btrfs_err(fs_info, "superblock fsid doesn't match fsid of fs_devices: %pU != %pU", sb->fsid, fs_info->fs_devices->fsid); ret = -EINVAL; } if (memcmp(fs_info->fs_devices->metadata_uuid, btrfs_sb_fsid_ptr(sb), BTRFS_FSID_SIZE) != 0) { btrfs_err(fs_info, "superblock metadata_uuid doesn't match metadata uuid of fs_devices: %pU != %pU", btrfs_sb_fsid_ptr(sb), fs_info->fs_devices->metadata_uuid); ret = -EINVAL; } if (memcmp(fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) { btrfs_err(fs_info, "dev_item UUID does not match metadata fsid: %pU != %pU", fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid); ret = -EINVAL; } /* * Artificial requirement for block-group-tree to force newer features * (free-space-tree, no-holes) so the test matrix is smaller. */ if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE) && (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID) || !btrfs_fs_incompat(fs_info, NO_HOLES))) { btrfs_err(fs_info, "block-group-tree feature requires free-space-tree and no-holes"); ret = -EINVAL; } /* * Hint to catch really bogus numbers, bitflips or so, more exact checks are * done later */ if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) { btrfs_err(fs_info, "bytes_used is too small %llu", btrfs_super_bytes_used(sb)); ret = -EINVAL; } if (!is_power_of_2(btrfs_super_stripesize(sb))) { btrfs_err(fs_info, "invalid stripesize %u", btrfs_super_stripesize(sb)); ret = -EINVAL; } if (btrfs_super_num_devices(sb) > (1UL << 31)) btrfs_warn(fs_info, "suspicious number of devices: %llu", btrfs_super_num_devices(sb)); if (btrfs_super_num_devices(sb) == 0) { btrfs_err(fs_info, "number of devices is 0"); ret = -EINVAL; } if (mirror_num >= 0 && btrfs_super_bytenr(sb) != btrfs_sb_offset(mirror_num)) { btrfs_err(fs_info, "super offset mismatch %llu != %u", btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET); ret = -EINVAL; } if (ret) return ret; ret = validate_sys_chunk_array(fs_info, sb); /* * Obvious sys_chunk_array corruptions, it must hold at least one key * and one chunk */ if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) { btrfs_err(fs_info, "system chunk array too big %u > %u", btrfs_super_sys_array_size(sb), BTRFS_SYSTEM_CHUNK_ARRAY_SIZE); ret = -EINVAL; } if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key) + sizeof(struct btrfs_chunk)) { btrfs_err(fs_info, "system chunk array too small %u < %zu", btrfs_super_sys_array_size(sb), sizeof(struct btrfs_disk_key) + sizeof(struct btrfs_chunk)); ret = -EINVAL; } /* * The generation is a global counter, we'll trust it more than the others * but it's still possible that it's the one that's wrong. */ if (btrfs_super_generation(sb) < btrfs_super_chunk_root_generation(sb)) btrfs_warn(fs_info, "suspicious: generation < chunk_root_generation: %llu < %llu", btrfs_super_generation(sb), btrfs_super_chunk_root_generation(sb)); if (btrfs_super_generation(sb) < btrfs_super_cache_generation(sb) && btrfs_super_cache_generation(sb) != (u64)-1) btrfs_warn(fs_info, "suspicious: generation < cache_generation: %llu < %llu", btrfs_super_generation(sb), btrfs_super_cache_generation(sb)); return ret; } /* * Validation of super block at mount time. * Some checks already done early at mount time, like csum type and incompat * flags will be skipped. */ static int btrfs_validate_mount_super(struct btrfs_fs_info *fs_info) { return btrfs_validate_super(fs_info, fs_info->super_copy, 0); } /* * Validation of super block at write time. * Some checks like bytenr check will be skipped as their values will be * overwritten soon. * Extra checks like csum type and incompat flags will be done here. */ static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info, struct btrfs_super_block *sb) { int ret; ret = btrfs_validate_super(fs_info, sb, -1); if (ret < 0) goto out; if (unlikely(!btrfs_supported_super_csum(btrfs_super_csum_type(sb)))) { ret = -EUCLEAN; btrfs_err(fs_info, "invalid csum type, has %u want %u", btrfs_super_csum_type(sb), BTRFS_CSUM_TYPE_CRC32); goto out; } if (unlikely(btrfs_super_incompat_flags(sb) & ~BTRFS_FEATURE_INCOMPAT_SUPP)) { ret = -EUCLEAN; btrfs_err(fs_info, "invalid incompat flags, has 0x%llx valid mask 0x%llx", btrfs_super_incompat_flags(sb), (unsigned long long)BTRFS_FEATURE_INCOMPAT_SUPP); goto out; } out: if (ret < 0) btrfs_err(fs_info, "super block corruption detected before writing it to disk"); return ret; } static int load_super_root(struct btrfs_root *root, u64 bytenr, u64 gen, int level) { struct btrfs_tree_parent_check check = { .level = level, .transid = gen, .owner_root = btrfs_root_id(root) }; int ret = 0; root->node = read_tree_block(root->fs_info, bytenr, &check); if (IS_ERR(root->node)) { ret = PTR_ERR(root->node); root->node = NULL; return ret; } if (unlikely(!extent_buffer_uptodate(root->node))) { free_extent_buffer(root->node); root->node = NULL; return -EIO; } btrfs_set_root_node(&root->root_item, root->node); root->commit_root = btrfs_root_node(root); btrfs_set_root_refs(&root->root_item, 1); return ret; } static int load_important_roots(struct btrfs_fs_info *fs_info) { struct btrfs_super_block *sb = fs_info->super_copy; u64 gen, bytenr; int level, ret; bytenr = btrfs_super_root(sb); gen = btrfs_super_generation(sb); level = btrfs_super_root_level(sb); ret = load_super_root(fs_info->tree_root, bytenr, gen, level); if (ret) { btrfs_warn(fs_info, "couldn't read tree root"); return ret; } return 0; } static int __cold init_tree_roots(struct btrfs_fs_info *fs_info) { int backup_index = find_newest_super_backup(fs_info); struct btrfs_super_block *sb = fs_info->super_copy; struct btrfs_root *tree_root = fs_info->tree_root; bool handle_error = false; int ret = 0; int i; for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) { if (handle_error) { if (!IS_ERR(tree_root->node)) free_extent_buffer(tree_root->node); tree_root->node = NULL; if (!btrfs_test_opt(fs_info, USEBACKUPROOT)) break; free_root_pointers(fs_info, 0); /* * Don't use the log in recovery mode, it won't be * valid */ btrfs_set_super_log_root(sb, 0); btrfs_warn(fs_info, "try to load backup roots slot %d", i); ret = read_backup_root(fs_info, i); backup_index = ret; if (ret < 0) return ret; } ret = load_important_roots(fs_info); if (ret) { handle_error = true; continue; } /* * No need to hold btrfs_root::objectid_mutex since the fs * hasn't been fully initialised and we are the only user */ ret = btrfs_init_root_free_objectid(tree_root); if (ret < 0) { handle_error = true; continue; } ASSERT(tree_root->free_objectid <= BTRFS_LAST_FREE_OBJECTID); ret = btrfs_read_roots(fs_info); if (ret < 0) { handle_error = true; continue; } /* All successful */ fs_info->generation = btrfs_header_generation(tree_root->node); btrfs_set_last_trans_committed(fs_info, fs_info->generation); fs_info->last_reloc_trans = 0; /* Always begin writing backup roots after the one being used */ if (backup_index < 0) { fs_info->backup_root_index = 0; } else { fs_info->backup_root_index = backup_index + 1; fs_info->backup_root_index %= BTRFS_NUM_BACKUP_ROOTS; } break; } return ret; } /* * Lockdep gets confused between our buffer_tree which requires IRQ locking because * we modify marks in the IRQ context, and our delayed inode xarray which doesn't * have these requirements. Use a class key so lockdep doesn't get them mixed up. */ static struct lock_class_key buffer_xa_class; void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) { INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); /* Use the same flags as mapping->i_pages. */ xa_init_flags(&fs_info->buffer_tree, XA_FLAGS_LOCK_IRQ | XA_FLAGS_ACCOUNT); lockdep_set_class(&fs_info->buffer_tree.xa_lock, &buffer_xa_class); INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->delayed_iputs); INIT_LIST_HEAD(&fs_info->delalloc_roots); INIT_LIST_HEAD(&fs_info->caching_block_groups); spin_lock_init(&fs_info->delalloc_root_lock); spin_lock_init(&fs_info->trans_lock); spin_lock_init(&fs_info->fs_roots_radix_lock); spin_lock_init(&fs_info->delayed_iput_lock); spin_lock_init(&fs_info->defrag_inodes_lock); spin_lock_init(&fs_info->super_lock); spin_lock_init(&fs_info->unused_bgs_lock); spin_lock_init(&fs_info->treelog_bg_lock); spin_lock_init(&fs_info->zone_active_bgs_lock); spin_lock_init(&fs_info->relocation_bg_lock); rwlock_init(&fs_info->tree_mod_log_lock); rwlock_init(&fs_info->global_root_lock); mutex_init(&fs_info->unused_bg_unpin_mutex); mutex_init(&fs_info->reclaim_bgs_lock); mutex_init(&fs_info->reloc_mutex); mutex_init(&fs_info->delalloc_root_mutex); mutex_init(&fs_info->zoned_meta_io_lock); mutex_init(&fs_info->zoned_data_reloc_io_lock); seqlock_init(&fs_info->profiles_lock); btrfs_lockdep_init_map(fs_info, btrfs_trans_num_writers); btrfs_lockdep_init_map(fs_info, btrfs_trans_num_extwriters); btrfs_lockdep_init_map(fs_info, btrfs_trans_pending_ordered); btrfs_lockdep_init_map(fs_info, btrfs_ordered_extent); btrfs_state_lockdep_init_map(fs_info, btrfs_trans_commit_prep, BTRFS_LOCKDEP_TRANS_COMMIT_PREP); btrfs_state_lockdep_init_map(fs_info, btrfs_trans_unblocked, BTRFS_LOCKDEP_TRANS_UNBLOCKED); btrfs_state_lockdep_init_map(fs_info, btrfs_trans_super_committed, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED); btrfs_state_lockdep_init_map(fs_info, btrfs_trans_completed, BTRFS_LOCKDEP_TRANS_COMPLETED); INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); INIT_LIST_HEAD(&fs_info->space_info); INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); INIT_LIST_HEAD(&fs_info->unused_bgs); INIT_LIST_HEAD(&fs_info->reclaim_bgs); INIT_LIST_HEAD(&fs_info->zone_active_bgs); #ifdef CONFIG_BTRFS_DEBUG INIT_LIST_HEAD(&fs_info->allocated_roots); INIT_LIST_HEAD(&fs_info->allocated_ebs); spin_lock_init(&fs_info->eb_leak_lock); #endif fs_info->mapping_tree = RB_ROOT_CACHED; rwlock_init(&fs_info->mapping_tree_lock); btrfs_init_block_rsv(&fs_info->global_block_rsv, BTRFS_BLOCK_RSV_GLOBAL); btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS); btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK); btrfs_init_block_rsv(&fs_info->treelog_rsv, BTRFS_BLOCK_RSV_TREELOG); btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY); btrfs_init_block_rsv(&fs_info->delayed_block_rsv, BTRFS_BLOCK_RSV_DELOPS); btrfs_init_block_rsv(&fs_info->delayed_refs_rsv, BTRFS_BLOCK_RSV_DELREFS); atomic_set(&fs_info->async_delalloc_pages, 0); atomic_set(&fs_info->defrag_running, 0); atomic_set(&fs_info->nr_delayed_iputs, 0); atomic64_set(&fs_info->tree_mod_seq, 0); fs_info->global_root_tree = RB_ROOT; fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; fs_info->metadata_ratio = 0; fs_info->defrag_inodes = RB_ROOT; atomic64_set(&fs_info->free_chunk_space, 0); fs_info->tree_mod_log = RB_ROOT; fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; btrfs_init_ref_verify(fs_info); fs_info->thread_pool_size = min_t(unsigned long, num_online_cpus() + 2, 8); INIT_LIST_HEAD(&fs_info->ordered_roots); spin_lock_init(&fs_info->ordered_root_lock); btrfs_init_scrub(fs_info); btrfs_init_balance(fs_info); btrfs_init_async_reclaim_work(fs_info); btrfs_init_extent_map_shrinker_work(fs_info); rwlock_init(&fs_info->block_group_cache_lock); fs_info->block_group_cache_tree = RB_ROOT_CACHED; btrfs_extent_io_tree_init(fs_info, &fs_info->excluded_extents, IO_TREE_FS_EXCLUDED_EXTENTS); mutex_init(&fs_info->ordered_operations_mutex); mutex_init(&fs_info->tree_log_mutex); mutex_init(&fs_info->chunk_mutex); mutex_init(&fs_info->transaction_kthread_mutex); mutex_init(&fs_info->cleaner_mutex); mutex_init(&fs_info->ro_block_group_mutex); init_rwsem(&fs_info->commit_root_sem); init_rwsem(&fs_info->cleanup_work_sem); init_rwsem(&fs_info->subvol_sem); sema_init(&fs_info->uuid_tree_rescan_sem, 1); btrfs_init_dev_replace_locks(fs_info); btrfs_init_qgroup(fs_info); btrfs_discard_init(fs_info); btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); btrfs_init_free_cluster(&fs_info->data_alloc_cluster); init_waitqueue_head(&fs_info->transaction_throttle); init_waitqueue_head(&fs_info->transaction_wait); init_waitqueue_head(&fs_info->transaction_blocked_wait); init_waitqueue_head(&fs_info->async_submit_wait); init_waitqueue_head(&fs_info->delayed_iputs_wait); /* Usable values until the real ones are cached from the superblock */ fs_info->nodesize = 4096; fs_info->sectorsize = 4096; fs_info->sectorsize_bits = ilog2(4096); fs_info->stripesize = 4096; /* Default compress algorithm when user does -o compress */ fs_info->compress_type = BTRFS_COMPRESS_ZLIB; fs_info->max_extent_size = BTRFS_MAX_EXTENT_SIZE; spin_lock_init(&fs_info->swapfile_pins_lock); fs_info->swapfile_pins = RB_ROOT; fs_info->bg_reclaim_threshold = BTRFS_DEFAULT_RECLAIM_THRESH; INIT_WORK(&fs_info->reclaim_bgs_work, btrfs_reclaim_bgs_work); } static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block *sb) { int ret; fs_info->sb = sb; /* Temporary fixed values for block size until we read the superblock. */ sb->s_blocksize = BTRFS_BDEV_BLOCKSIZE; sb->s_blocksize_bits = blksize_bits(BTRFS_BDEV_BLOCKSIZE); ret = percpu_counter_init(&fs_info->ordered_bytes, 0, GFP_KERNEL); if (ret) return ret; ret = percpu_counter_init(&fs_info->evictable_extent_maps, 0, GFP_KERNEL); if (ret) return ret; ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL); if (ret) return ret; ret = percpu_counter_init(&fs_info->stats_read_blocks, 0, GFP_KERNEL); if (ret) return ret; fs_info->dirty_metadata_batch = PAGE_SIZE * (1 + ilog2(nr_cpu_ids)); ret = percpu_counter_init(&fs_info->delalloc_bytes, 0, GFP_KERNEL); if (ret) return ret; ret = percpu_counter_init(&fs_info->dev_replace.bio_counter, 0, GFP_KERNEL); if (ret) return ret; fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root), GFP_KERNEL); if (!fs_info->delayed_root) return -ENOMEM; btrfs_init_delayed_root(fs_info->delayed_root); if (sb_rdonly(sb)) set_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state); if (btrfs_test_opt(fs_info, IGNOREMETACSUMS)) set_bit(BTRFS_FS_STATE_SKIP_META_CSUMS, &fs_info->fs_state); return btrfs_alloc_stripe_hash_table(fs_info); } static int btrfs_uuid_rescan_kthread(void *data) { struct btrfs_fs_info *fs_info = data; int ret; /* * 1st step is to iterate through the existing UUID tree and * to delete all entries that contain outdated data. * 2nd step is to add all missing entries to the UUID tree. */ ret = btrfs_uuid_tree_iterate(fs_info); if (ret < 0) { if (ret != -EINTR) btrfs_warn(fs_info, "iterating uuid_tree failed %d", ret); up(&fs_info->uuid_tree_rescan_sem); return ret; } return btrfs_uuid_scan_kthread(data); } static int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info) { struct task_struct *task; down(&fs_info->uuid_tree_rescan_sem); task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid"); if (IS_ERR(task)) { /* fs_info->update_uuid_tree_gen remains 0 in all error case */ btrfs_warn(fs_info, "failed to start uuid_rescan task"); up(&fs_info->uuid_tree_rescan_sem); return PTR_ERR(task); } return 0; } static int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) { u64 root_objectid = 0; struct btrfs_root *gang[8]; int ret = 0; while (1) { unsigned int found; spin_lock(&fs_info->fs_roots_radix_lock); found = radix_tree_gang_lookup(&fs_info->fs_roots_radix, (void **)gang, root_objectid, ARRAY_SIZE(gang)); if (!found) { spin_unlock(&fs_info->fs_roots_radix_lock); break; } root_objectid = btrfs_root_id(gang[found - 1]) + 1; for (int i = 0; i < found; i++) { /* Avoid to grab roots in dead_roots. */ if (btrfs_root_refs(&gang[i]->root_item) == 0) { gang[i] = NULL; continue; } /* Grab all the search result for later use. */ gang[i] = btrfs_grab_root(gang[i]); } spin_unlock(&fs_info->fs_roots_radix_lock); for (int i = 0; i < found; i++) { if (!gang[i]) continue; root_objectid = btrfs_root_id(gang[i]); /* * Continue to release the remaining roots after the first * error without cleanup and preserve the first error * for the return. */ if (!ret) ret = btrfs_orphan_cleanup(gang[i]); btrfs_put_root(gang[i]); } if (ret) break; root_objectid++; } return ret; } /* * Mounting logic specific to read-write file systems. Shared by open_ctree * and btrfs_remount when remounting from read-only to read-write. */ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info) { int ret; const bool cache_opt = btrfs_test_opt(fs_info, SPACE_CACHE); bool rebuild_free_space_tree = false; if (btrfs_test_opt(fs_info, CLEAR_CACHE) && btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) btrfs_warn(fs_info, "'clear_cache' option is ignored with extent tree v2"); else rebuild_free_space_tree = true; } else if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID)) { btrfs_warn(fs_info, "free space tree is invalid"); rebuild_free_space_tree = true; } if (rebuild_free_space_tree) { btrfs_info(fs_info, "rebuilding free space tree"); ret = btrfs_rebuild_free_space_tree(fs_info); if (ret) { btrfs_warn(fs_info, "failed to rebuild free space tree: %d", ret); goto out; } } if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && !btrfs_test_opt(fs_info, FREE_SPACE_TREE)) { btrfs_info(fs_info, "disabling free space tree"); ret = btrfs_delete_free_space_tree(fs_info); if (ret) { btrfs_warn(fs_info, "failed to disable free space tree: %d", ret); goto out; } } /* * btrfs_find_orphan_roots() is responsible for finding all the dead * roots (with 0 refs), flag them with BTRFS_ROOT_DEAD_TREE and load * them into the fs_info->fs_roots_radix tree. This must be done before * calling btrfs_orphan_cleanup() on the tree root. If we don't do it * first, then btrfs_orphan_cleanup() will delete a dead root's orphan * item before the root's tree is deleted - this means that if we unmount * or crash before the deletion completes, on the next mount we will not * delete what remains of the tree because the orphan item does not * exists anymore, which is what tells us we have a pending deletion. */ ret = btrfs_find_orphan_roots(fs_info); if (ret) goto out; ret = btrfs_cleanup_fs_roots(fs_info); if (ret) goto out; down_read(&fs_info->cleanup_work_sem); if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) || (ret = btrfs_orphan_cleanup(fs_info->tree_root))) { up_read(&fs_info->cleanup_work_sem); goto out; } up_read(&fs_info->cleanup_work_sem); mutex_lock(&fs_info->cleaner_mutex); ret = btrfs_recover_relocation(fs_info); mutex_unlock(&fs_info->cleaner_mutex); if (ret < 0) { btrfs_warn(fs_info, "failed to recover relocation: %d", ret); goto out; } if (btrfs_test_opt(fs_info, FREE_SPACE_TREE) && !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { btrfs_info(fs_info, "creating free space tree"); ret = btrfs_create_free_space_tree(fs_info); if (ret) { btrfs_warn(fs_info, "failed to create free space tree: %d", ret); goto out; } } if (cache_opt != btrfs_free_space_cache_v1_active(fs_info)) { ret = btrfs_set_free_space_cache_v1_active(fs_info, cache_opt); if (ret) goto out; } ret = btrfs_resume_balance_async(fs_info); if (ret) goto out; ret = btrfs_resume_dev_replace_async(fs_info); if (ret) { btrfs_warn(fs_info, "failed to resume dev_replace"); goto out; } btrfs_qgroup_rescan_resume(fs_info); if (!fs_info->uuid_root) { btrfs_info(fs_info, "creating UUID tree"); ret = btrfs_create_uuid_tree(fs_info); if (ret) { btrfs_warn(fs_info, "failed to create the UUID tree %d", ret); goto out; } } out: return ret; } /* * Do various sanity and dependency checks of different features. * * @is_rw_mount: If the mount is read-write. * * This is the place for less strict checks (like for subpage or artificial * feature dependencies). * * For strict checks or possible corruption detection, see * btrfs_validate_super(). * * This should be called after btrfs_parse_options(), as some mount options * (space cache related) can modify on-disk format like free space tree and * screw up certain feature dependencies. */ int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount) { struct btrfs_super_block *disk_super = fs_info->super_copy; u64 incompat = btrfs_super_incompat_flags(disk_super); const u64 compat_ro = btrfs_super_compat_ro_flags(disk_super); const u64 compat_ro_unsupp = (compat_ro & ~BTRFS_FEATURE_COMPAT_RO_SUPP); if (incompat & ~BTRFS_FEATURE_INCOMPAT_SUPP) { btrfs_err(fs_info, "cannot mount because of unknown incompat features (0x%llx)", incompat); return -EINVAL; } /* Runtime limitation for mixed block groups. */ if ((incompat & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && (fs_info->sectorsize != fs_info->nodesize)) { btrfs_err(fs_info, "unequal nodesize/sectorsize (%u != %u) are not allowed for mixed block groups", fs_info->nodesize, fs_info->sectorsize); return -EINVAL; } /* Mixed backref is an always-enabled feature. */ incompat |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; /* Set compression related flags just in case. */ if (fs_info->compress_type == BTRFS_COMPRESS_LZO) incompat |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; else if (fs_info->compress_type == BTRFS_COMPRESS_ZSTD) incompat |= BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD; /* * An ancient flag, which should really be marked deprecated. * Such runtime limitation doesn't really need a incompat flag. */ if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) incompat |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; if (compat_ro_unsupp && is_rw_mount) { btrfs_err(fs_info, "cannot mount read-write because of unknown compat_ro features (0x%llx)", compat_ro); return -EINVAL; } /* * We have unsupported RO compat features, although RO mounted, we * should not cause any metadata writes, including log replay. * Or we could screw up whatever the new feature requires. */ if (compat_ro_unsupp && btrfs_super_log_root(disk_super) && !btrfs_test_opt(fs_info, NOLOGREPLAY)) { btrfs_err(fs_info, "cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay", compat_ro); return -EINVAL; } /* * Artificial limitations for block group tree, to force * block-group-tree to rely on no-holes and free-space-tree. */ if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE) && (!btrfs_fs_incompat(fs_info, NO_HOLES) || !btrfs_test_opt(fs_info, FREE_SPACE_TREE))) { btrfs_err(fs_info, "block-group-tree feature requires no-holes and free-space-tree features"); return -EINVAL; } /* * Subpage/bs > ps runtime limitation on v1 cache. * * V1 space cache still has some hard coded PAGE_SIZE usage, while * we're already defaulting to v2 cache, no need to bother v1 as it's * going to be deprecated anyway. */ if (fs_info->sectorsize != PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) { btrfs_warn(fs_info, "v1 space cache is not supported for page size %lu with sectorsize %u", PAGE_SIZE, fs_info->sectorsize); return -EINVAL; } /* This can be called by remount, we need to protect the super block. */ spin_lock(&fs_info->super_lock); btrfs_set_super_incompat_flags(disk_super, incompat); spin_unlock(&fs_info->super_lock); return 0; } int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices) { u32 sectorsize; u32 nodesize; u32 stripesize; u64 generation; u16 csum_type; struct btrfs_super_block *disk_super; struct btrfs_fs_info *fs_info = btrfs_sb(sb); struct btrfs_root *tree_root; struct btrfs_root *chunk_root; int ret; int level; ret = init_mount_fs_info(fs_info, sb); if (ret) goto fail; /* These need to be init'ed before we start creating inodes and such. */ tree_root = btrfs_alloc_root(fs_info, BTRFS_ROOT_TREE_OBJECTID, GFP_KERNEL); fs_info->tree_root = tree_root; chunk_root = btrfs_alloc_root(fs_info, BTRFS_CHUNK_TREE_OBJECTID, GFP_KERNEL); fs_info->chunk_root = chunk_root; if (!tree_root || !chunk_root) { ret = -ENOMEM; goto fail; } ret = btrfs_init_btree_inode(sb); if (ret) goto fail; invalidate_bdev(fs_devices->latest_dev->bdev); /* * Read super block and check the signature bytes only */ disk_super = btrfs_read_disk_super(fs_devices->latest_dev->bdev, 0, false); if (IS_ERR(disk_super)) { ret = PTR_ERR(disk_super); goto fail_alloc; } btrfs_info(fs_info, "first mount of filesystem %pU", disk_super->fsid); /* * Verify the type first, if that or the checksum value are * corrupted, we'll find out */ csum_type = btrfs_super_csum_type(disk_super); if (!btrfs_supported_super_csum(csum_type)) { btrfs_err(fs_info, "unsupported checksum algorithm: %u", csum_type); ret = -EINVAL; btrfs_release_disk_super(disk_super); goto fail_alloc; } fs_info->csum_size = btrfs_super_csum_size(disk_super); ret = btrfs_init_csum_hash(fs_info, csum_type); if (ret) { btrfs_release_disk_super(disk_super); goto fail_alloc; } /* * We want to check superblock checksum, the type is stored inside. * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k). */ if (btrfs_check_super_csum(fs_info, disk_super)) { btrfs_err(fs_info, "superblock checksum mismatch"); ret = -EINVAL; btrfs_release_disk_super(disk_super); goto fail_alloc; } /* * super_copy is zeroed at allocation time and we never touch the * following bytes up to INFO_SIZE, the checksum is calculated from * the whole block of INFO_SIZE */ memcpy(fs_info->super_copy, disk_super, sizeof(*fs_info->super_copy)); btrfs_release_disk_super(disk_super); disk_super = fs_info->super_copy; memcpy(fs_info->super_for_commit, fs_info->super_copy, sizeof(*fs_info->super_for_commit)); ret = btrfs_validate_mount_super(fs_info); if (ret) { btrfs_err(fs_info, "superblock contains fatal errors"); ret = -EINVAL; goto fail_alloc; } if (!btrfs_super_root(disk_super)) { btrfs_err(fs_info, "invalid superblock tree root bytenr"); ret = -EINVAL; goto fail_alloc; } /* check FS state, whether FS is broken. */ if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR) WRITE_ONCE(fs_info->fs_error, -EUCLEAN); /* Set up fs_info before parsing mount options */ nodesize = btrfs_super_nodesize(disk_super); sectorsize = btrfs_super_sectorsize(disk_super); stripesize = sectorsize; fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids)); fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids)); fs_info->nodesize = nodesize; fs_info->nodesize_bits = ilog2(nodesize); fs_info->sectorsize = sectorsize; fs_info->sectorsize_bits = ilog2(sectorsize); fs_info->block_min_order = ilog2(round_up(sectorsize, PAGE_SIZE) >> PAGE_SHIFT); fs_info->block_max_order = ilog2((BITS_PER_LONG << fs_info->sectorsize_bits) >> PAGE_SHIFT); fs_info->csums_per_leaf = BTRFS_MAX_ITEM_SIZE(fs_info) / fs_info->csum_size; fs_info->stripesize = stripesize; fs_info->fs_devices->fs_info = fs_info; if (fs_info->sectorsize > PAGE_SIZE) btrfs_warn(fs_info, "support for block size %u with page size %lu is experimental, some features may be missing", fs_info->sectorsize, PAGE_SIZE); /* * Handle the space caching options appropriately now that we have the * super block loaded and validated. */ btrfs_set_free_space_cache_settings(fs_info); if (!btrfs_check_options(fs_info, &fs_info->mount_opt, sb->s_flags)) { ret = -EINVAL; goto fail_alloc; } ret = btrfs_check_features(fs_info, !sb_rdonly(sb)); if (ret < 0) goto fail_alloc; /* * At this point our mount options are validated, if we set ->max_inline * to something non-standard make sure we truncate it to sectorsize. */ fs_info->max_inline = min_t(u64, fs_info->max_inline, fs_info->sectorsize); ret = btrfs_alloc_compress_wsm(fs_info); if (ret) goto fail_sb_buffer; ret = btrfs_init_workqueues(fs_info); if (ret) goto fail_sb_buffer; sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super); sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE); /* Update the values for the current filesystem. */ sb->s_blocksize = sectorsize; sb->s_blocksize_bits = blksize_bits(sectorsize); memcpy(&sb->s_uuid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE); mutex_lock(&fs_info->chunk_mutex); ret = btrfs_read_sys_array(fs_info); mutex_unlock(&fs_info->chunk_mutex); if (ret) { btrfs_err(fs_info, "failed to read the system array: %d", ret); goto fail_sb_buffer; } generation = btrfs_super_chunk_root_generation(disk_super); level = btrfs_super_chunk_root_level(disk_super); ret = load_super_root(chunk_root, btrfs_super_chunk_root(disk_super), generation, level); if (ret) { btrfs_err(fs_info, "failed to read chunk root"); goto fail_tree_roots; } read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, offsetof(struct btrfs_header, chunk_tree_uuid), BTRFS_UUID_SIZE); ret = btrfs_read_chunk_tree(fs_info); if (ret) { btrfs_err(fs_info, "failed to read chunk tree: %d", ret); goto fail_tree_roots; } /* * At this point we know all the devices that make this filesystem, * including the seed devices but we don't know yet if the replace * target is required. So free devices that are not part of this * filesystem but skip the replace target device which is checked * below in btrfs_init_dev_replace(). */ btrfs_free_extra_devids(fs_devices); if (unlikely(!fs_devices->latest_dev->bdev)) { btrfs_err(fs_info, "failed to read devices"); ret = -EIO; goto fail_tree_roots; } ret = init_tree_roots(fs_info); if (ret) goto fail_tree_roots; /* * Get zone type information of zoned block devices. This will also * handle emulation of a zoned filesystem if a regular device has the * zoned incompat feature flag set. */ ret = btrfs_get_dev_zone_info_all_devices(fs_info); if (ret) { btrfs_err(fs_info, "zoned: failed to read device zone info: %d", ret); goto fail_block_groups; } /* * If we have a uuid root and we're not being told to rescan we need to * check the generation here so we can set the * BTRFS_FS_UPDATE_UUID_TREE_GEN bit. Otherwise we could commit the * transaction during a balance or the log replay without updating the * uuid generation, and then if we crash we would rescan the uuid tree, * even though it was perfectly fine. */ if (fs_info->uuid_root && !btrfs_test_opt(fs_info, RESCAN_UUID_TREE) && fs_info->generation == btrfs_super_uuid_tree_generation(disk_super)) set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags); ret = btrfs_verify_dev_extents(fs_info); if (ret) { btrfs_err(fs_info, "failed to verify dev extents against chunks: %d", ret); goto fail_block_groups; } ret = btrfs_recover_balance(fs_info); if (ret) { btrfs_err(fs_info, "failed to recover balance: %d", ret); goto fail_block_groups; } ret = btrfs_init_dev_stats(fs_info); if (ret) { btrfs_err(fs_info, "failed to init dev_stats: %d", ret); goto fail_block_groups; } ret = btrfs_init_dev_replace(fs_info); if (ret) { btrfs_err(fs_info, "failed to init dev_replace: %d", ret); goto fail_block_groups; } ret = btrfs_check_zoned_mode(fs_info); if (ret) { btrfs_err(fs_info, "failed to initialize zoned mode: %d", ret); goto fail_block_groups; } ret = btrfs_sysfs_add_fsid(fs_devices); if (ret) { btrfs_err(fs_info, "failed to init sysfs fsid interface: %d", ret); goto fail_block_groups; } ret = btrfs_sysfs_add_mounted(fs_info); if (ret) { btrfs_err(fs_info, "failed to init sysfs interface: %d", ret); goto fail_fsdev_sysfs; } ret = btrfs_init_space_info(fs_info); if (ret) { btrfs_err(fs_info, "failed to initialize space info: %d", ret); goto fail_sysfs; } ret = btrfs_read_block_groups(fs_info); if (ret) { btrfs_err(fs_info, "failed to read block groups: %d", ret); goto fail_sysfs; } btrfs_zoned_reserve_data_reloc_bg(fs_info); btrfs_free_zone_cache(fs_info); btrfs_check_active_zone_reservation(fs_info); if (!sb_rdonly(sb) && fs_info->fs_devices->missing_devices && !btrfs_check_rw_degradable(fs_info, NULL)) { btrfs_warn(fs_info, "writable mount is not allowed due to too many missing devices"); ret = -EINVAL; goto fail_sysfs; } fs_info->cleaner_kthread = kthread_run(cleaner_kthread, fs_info, "btrfs-cleaner"); if (IS_ERR(fs_info->cleaner_kthread)) { ret = PTR_ERR(fs_info->cleaner_kthread); goto fail_sysfs; } fs_info->transaction_kthread = kthread_run(transaction_kthread, tree_root, "btrfs-transaction"); if (IS_ERR(fs_info->transaction_kthread)) { ret = PTR_ERR(fs_info->transaction_kthread); goto fail_cleaner; } ret = btrfs_read_qgroup_config(fs_info); if (ret) goto fail_trans_kthread; if (btrfs_build_ref_tree(fs_info)) btrfs_err(fs_info, "couldn't build ref tree"); /* do not make disk changes in broken FS or nologreplay is given */ if (btrfs_super_log_root(disk_super) != 0 && !btrfs_test_opt(fs_info, NOLOGREPLAY)) { btrfs_info(fs_info, "start tree-log replay"); ret = btrfs_replay_log(fs_info, fs_devices); if (ret) goto fail_qgroup; } fs_info->fs_root = btrfs_get_fs_root(fs_info, BTRFS_FS_TREE_OBJECTID, true); if (IS_ERR(fs_info->fs_root)) { ret = PTR_ERR(fs_info->fs_root); btrfs_warn(fs_info, "failed to read fs tree: %d", ret); fs_info->fs_root = NULL; goto fail_qgroup; } if (sb_rdonly(sb)) return 0; ret = btrfs_start_pre_rw_mount(fs_info); if (ret) { close_ctree(fs_info); return ret; } btrfs_discard_resume(fs_info); if (fs_info->uuid_root && (btrfs_test_opt(fs_info, RESCAN_UUID_TREE) || fs_info->generation != btrfs_super_uuid_tree_generation(disk_super))) { btrfs_info(fs_info, "checking UUID tree"); ret = btrfs_check_uuid_tree(fs_info); if (ret) { btrfs_warn(fs_info, "failed to check the UUID tree: %d", ret); close_ctree(fs_info); return ret; } } set_bit(BTRFS_FS_OPEN, &fs_info->flags); /* Kick the cleaner thread so it'll start deleting snapshots. */ if (test_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags)) wake_up_process(fs_info->cleaner_kthread); return 0; fail_qgroup: btrfs_free_qgroup_config(fs_info); fail_trans_kthread: kthread_stop(fs_info->transaction_kthread); btrfs_cleanup_transaction(fs_info); btrfs_free_fs_roots(fs_info); fail_cleaner: kthread_stop(fs_info->cleaner_kthread); /* * make sure we're done with the btree inode before we stop our * kthreads */ filemap_write_and_wait(fs_info->btree_inode->i_mapping); fail_sysfs: btrfs_sysfs_remove_mounted(fs_info); fail_fsdev_sysfs: btrfs_sysfs_remove_fsid(fs_info->fs_devices); fail_block_groups: btrfs_put_block_group_cache(fs_info); fail_tree_roots: if (fs_info->data_reloc_root) btrfs_drop_and_free_fs_root(fs_info, fs_info->data_reloc_root); free_root_pointers(fs_info, true); invalidate_inode_pages2(fs_info->btree_inode->i_mapping); fail_sb_buffer: btrfs_stop_all_workers(fs_info); btrfs_free_block_groups(fs_info); fail_alloc: btrfs_mapping_tree_free(fs_info); iput(fs_info->btree_inode); fail: ASSERT(ret < 0); return ret; } ALLOW_ERROR_INJECTION(open_ctree, ERRNO); static void btrfs_end_super_write(struct bio *bio) { struct btrfs_device *device = bio->bi_private; struct folio_iter fi; bio_for_each_folio_all(fi, bio) { if (bio->bi_status) { btrfs_warn_rl(device->fs_info, "lost super block write due to IO error on %s (%d)", btrfs_dev_name(device), blk_status_to_errno(bio->bi_status)); btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_WRITE_ERRS); /* Ensure failure if the primary sb fails. */ if (bio->bi_opf & REQ_FUA) atomic_add(BTRFS_SUPER_PRIMARY_WRITE_ERROR, &device->sb_write_errors); else atomic_inc(&device->sb_write_errors); } folio_unlock(fi.folio); folio_put(fi.folio); } bio_put(bio); } /* * Write superblock @sb to the @device. Do not wait for completion, all the * folios we use for writing are locked. * * Write @max_mirrors copies of the superblock, where 0 means default that fit * the expected device size at commit time. Note that max_mirrors must be * same for write and wait phases. * * Return number of errors when folio is not found or submission fails. */ static int write_dev_supers(struct btrfs_device *device, struct btrfs_super_block *sb, int max_mirrors) { struct btrfs_fs_info *fs_info = device->fs_info; struct address_space *mapping = device->bdev->bd_mapping; SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); int i; int ret; u64 bytenr, bytenr_orig; atomic_set(&device->sb_write_errors, 0); if (max_mirrors == 0) max_mirrors = BTRFS_SUPER_MIRROR_MAX; shash->tfm = fs_info->csum_shash; for (i = 0; i < max_mirrors; i++) { struct folio *folio; struct bio *bio; struct btrfs_super_block *disk_super; size_t offset; bytenr_orig = btrfs_sb_offset(i); ret = btrfs_sb_log_location(device, i, WRITE, &bytenr); if (ret == -ENOENT) { continue; } else if (ret < 0) { btrfs_err(device->fs_info, "couldn't get super block location for mirror %d error %d", i, ret); atomic_inc(&device->sb_write_errors); continue; } if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->commit_total_bytes) break; btrfs_set_super_bytenr(sb, bytenr_orig); crypto_shash_digest(shash, (const char *)sb + BTRFS_CSUM_SIZE, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE, sb->csum); folio = __filemap_get_folio(mapping, bytenr >> PAGE_SHIFT, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, GFP_NOFS); if (IS_ERR(folio)) { btrfs_err(device->fs_info, "couldn't get super block page for bytenr %llu error %ld", bytenr, PTR_ERR(folio)); atomic_inc(&device->sb_write_errors); continue; } offset = offset_in_folio(folio, bytenr); disk_super = folio_address(folio) + offset; memcpy(disk_super, sb, BTRFS_SUPER_INFO_SIZE); /* * Directly use bios here instead of relying on the page cache * to do I/O, so we don't lose the ability to do integrity * checking. */ bio = bio_alloc(device->bdev, 1, REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO, GFP_NOFS); bio->bi_iter.bi_sector = bytenr >> SECTOR_SHIFT; bio->bi_private = device; bio->bi_end_io = btrfs_end_super_write; bio_add_folio_nofail(bio, folio, BTRFS_SUPER_INFO_SIZE, offset); /* * We FUA only the first super block. The others we allow to * go down lazy and there's a short window where the on-disk * copies might still contain the older version. */ if (i == 0 && !btrfs_test_opt(device->fs_info, NOBARRIER)) bio->bi_opf |= REQ_FUA; submit_bio(bio); if (btrfs_advance_sb_log(device, i)) atomic_inc(&device->sb_write_errors); } return atomic_read(&device->sb_write_errors) < i ? 0 : -1; } /* * Wait for write completion of superblocks done by write_dev_supers, * @max_mirrors same for write and wait phases. * * Return -1 if primary super block write failed or when there were no super block * copies written. Otherwise 0. */ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors) { int i; int errors = 0; bool primary_failed = false; int ret; u64 bytenr; if (max_mirrors == 0) max_mirrors = BTRFS_SUPER_MIRROR_MAX; for (i = 0; i < max_mirrors; i++) { struct folio *folio; ret = btrfs_sb_log_location(device, i, READ, &bytenr); if (ret == -ENOENT) { break; } else if (ret < 0) { errors++; if (i == 0) primary_failed = true; continue; } if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->commit_total_bytes) break; folio = filemap_get_folio(device->bdev->bd_mapping, bytenr >> PAGE_SHIFT); /* If the folio has been removed, then we know it completed. */ if (IS_ERR(folio)) continue; /* Folio will be unlocked once the write completes. */ folio_wait_locked(folio); folio_put(folio); } errors += atomic_read(&device->sb_write_errors); if (errors >= BTRFS_SUPER_PRIMARY_WRITE_ERROR) primary_failed = true; if (primary_failed) { btrfs_err(device->fs_info, "error writing primary super block to device %llu", device->devid); return -1; } return errors < i ? 0 : -1; } /* * endio for the write_dev_flush, this will wake anyone waiting * for the barrier when it is done */ static void btrfs_end_empty_barrier(struct bio *bio) { bio_uninit(bio); complete(bio->bi_private); } /* * Submit a flush request to the device if it supports it. Error handling is * done in the waiting counterpart. */ static void write_dev_flush(struct btrfs_device *device) { struct bio *bio = &device->flush_bio; device->last_flush_error = BLK_STS_OK; bio_init(bio, device->bdev, NULL, 0, REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH); bio->bi_end_io = btrfs_end_empty_barrier; init_completion(&device->flush_wait); bio->bi_private = &device->flush_wait; submit_bio(bio); set_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state); } /* * If the flush bio has been submitted by write_dev_flush, wait for it. * Return true for any error, and false otherwise. */ static bool wait_dev_flush(struct btrfs_device *device) { struct bio *bio = &device->flush_bio; if (!test_and_clear_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state)) return false; wait_for_completion_io(&device->flush_wait); if (bio->bi_status) { device->last_flush_error = bio->bi_status; btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_FLUSH_ERRS); return true; } return false; } /* * send an empty flush down to each device in parallel, * then wait for them */ static int barrier_all_devices(struct btrfs_fs_info *info) { struct list_head *head; struct btrfs_device *dev; int errors_wait = 0; lockdep_assert_held(&info->fs_devices->device_list_mutex); /* send down all the barriers */ head = &info->fs_devices->devices; list_for_each_entry(dev, head, dev_list) { if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) continue; if (!dev->bdev) continue; if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) || !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) continue; write_dev_flush(dev); } /* wait for all the barriers */ list_for_each_entry(dev, head, dev_list) { if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) continue; if (!dev->bdev) { errors_wait++; continue; } if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) || !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) continue; if (wait_dev_flush(dev)) errors_wait++; } /* * Checks last_flush_error of disks in order to determine the device * state. */ if (unlikely(errors_wait && !btrfs_check_rw_degradable(info, NULL))) return -EIO; return 0; } int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags) { int raid_type; int min_tolerated = INT_MAX; if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 || (flags & BTRFS_AVAIL_ALLOC_BIT_SINGLE)) min_tolerated = min_t(int, min_tolerated, btrfs_raid_array[BTRFS_RAID_SINGLE]. tolerated_failures); for (raid_type = 0; raid_type < BTRFS_NR_RAID_TYPES; raid_type++) { if (raid_type == BTRFS_RAID_SINGLE) continue; if (!(flags & btrfs_raid_array[raid_type].bg_flag)) continue; min_tolerated = min_t(int, min_tolerated, btrfs_raid_array[raid_type]. tolerated_failures); } if (min_tolerated == INT_MAX) { btrfs_warn(NULL, "unknown raid flag: %llu", flags); min_tolerated = 0; } return min_tolerated; } int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors) { struct list_head *head; struct btrfs_device *dev; struct btrfs_super_block *sb; struct btrfs_dev_item *dev_item; int ret; int do_barriers; int max_errors; int total_errors = 0; u64 flags; do_barriers = !btrfs_test_opt(fs_info, NOBARRIER); /* * max_mirrors == 0 indicates we're from commit_transaction, * not from fsync where the tree roots in fs_info have not * been consistent on disk. */ if (max_mirrors == 0) backup_super_roots(fs_info); sb = fs_info->super_for_commit; dev_item = &sb->dev_item; mutex_lock(&fs_info->fs_devices->device_list_mutex); head = &fs_info->fs_devices->devices; max_errors = btrfs_super_num_devices(fs_info->super_copy) - 1; if (do_barriers) { ret = barrier_all_devices(fs_info); if (ret) { mutex_unlock( &fs_info->fs_devices->device_list_mutex); btrfs_handle_fs_error(fs_info, ret, "errors while submitting device barriers."); return ret; } } list_for_each_entry(dev, head, dev_list) { if (!dev->bdev) { total_errors++; continue; } if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) || !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) continue; btrfs_set_stack_device_generation(dev_item, 0); btrfs_set_stack_device_type(dev_item, dev->type); btrfs_set_stack_device_id(dev_item, dev->devid); btrfs_set_stack_device_total_bytes(dev_item, dev->commit_total_bytes); btrfs_set_stack_device_bytes_used(dev_item, dev->commit_bytes_used); btrfs_set_stack_device_io_align(dev_item, dev->io_align); btrfs_set_stack_device_io_width(dev_item, dev->io_width); btrfs_set_stack_device_sector_size(dev_item, dev->sector_size); memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE); memcpy(dev_item->fsid, dev->fs_devices->metadata_uuid, BTRFS_FSID_SIZE); flags = btrfs_super_flags(sb); btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN); ret = btrfs_validate_write_super(fs_info, sb); if (unlikely(ret < 0)) { mutex_unlock(&fs_info->fs_devices->device_list_mutex); btrfs_handle_fs_error(fs_info, -EUCLEAN, "unexpected superblock corruption detected"); return -EUCLEAN; } ret = write_dev_supers(dev, sb, max_mirrors); if (ret) total_errors++; } if (unlikely(total_errors > max_errors)) { btrfs_err(fs_info, "%d errors while writing supers", total_errors); mutex_unlock(&fs_info->fs_devices->device_list_mutex); /* FUA is masked off if unsupported and can't be the reason */ btrfs_handle_fs_error(fs_info, -EIO, "%d errors while writing supers", total_errors); return -EIO; } total_errors = 0; list_for_each_entry(dev, head, dev_list) { if (!dev->bdev) continue; if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) || !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) continue; ret = wait_dev_supers(dev, max_mirrors); if (ret) total_errors++; } mutex_unlock(&fs_info->fs_devices->device_list_mutex); if (unlikely(total_errors > max_errors)) { btrfs_handle_fs_error(fs_info, -EIO, "%d errors while writing supers", total_errors); return -EIO; } return 0; } /* Drop a fs root from the radix tree and free it. */ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) { bool drop_ref = false; spin_lock(&fs_info->fs_roots_radix_lock); radix_tree_delete(&fs_info->fs_roots_radix, (unsigned long)btrfs_root_id(root)); if (test_and_clear_bit(BTRFS_ROOT_IN_RADIX, &root->state)) drop_ref = true; spin_unlock(&fs_info->fs_roots_radix_lock); if (BTRFS_FS_ERROR(fs_info)) { ASSERT(root->log_root == NULL); if (root->reloc_root) { btrfs_put_root(root->reloc_root); root->reloc_root = NULL; } } if (drop_ref) btrfs_put_root(root); } int btrfs_commit_super(struct btrfs_fs_info *fs_info) { mutex_lock(&fs_info->cleaner_mutex); btrfs_run_delayed_iputs(fs_info); mutex_unlock(&fs_info->cleaner_mutex); wake_up_process(fs_info->cleaner_kthread); /* wait until ongoing cleanup work done */ down_write(&fs_info->cleanup_work_sem); up_write(&fs_info->cleanup_work_sem); return btrfs_commit_current_transaction(fs_info->tree_root); } static void warn_about_uncommitted_trans(struct btrfs_fs_info *fs_info) { struct btrfs_transaction *trans; struct btrfs_transaction *tmp; bool found = false; /* * This function is only called at the very end of close_ctree(), * thus no other running transaction, no need to take trans_lock. */ ASSERT(test_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags)); list_for_each_entry_safe(trans, tmp, &fs_info->trans_list, list) { struct extent_state *cached = NULL; u64 dirty_bytes = 0; u64 cur = 0; u64 found_start; u64 found_end; found = true; while (btrfs_find_first_extent_bit(&trans->dirty_pages, cur, &found_start, &found_end, EXTENT_DIRTY, &cached)) { dirty_bytes += found_end + 1 - found_start; cur = found_end + 1; } btrfs_warn(fs_info, "transaction %llu (with %llu dirty metadata bytes) is not committed", trans->transid, dirty_bytes); btrfs_cleanup_one_transaction(trans); if (trans == fs_info->running_transaction) fs_info->running_transaction = NULL; list_del_init(&trans->list); btrfs_put_transaction(trans); trace_btrfs_transaction_commit(fs_info); } ASSERT(!found); } void __cold close_ctree(struct btrfs_fs_info *fs_info) { int ret; set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags); /* * If we had UNFINISHED_DROPS we could still be processing them, so * clear that bit and wake up relocation so it can stop. * We must do this before stopping the block group reclaim task, because * at btrfs_relocate_block_group() we wait for this bit, and after the * wait we stop with -EINTR if btrfs_fs_closing() returns non-zero - we * have just set BTRFS_FS_CLOSING_START, so btrfs_fs_closing() will * return 1. */ btrfs_wake_unfinished_drop(fs_info); /* * We may have the reclaim task running and relocating a data block group, * in which case it may create delayed iputs. So stop it before we park * the cleaner kthread otherwise we can get new delayed iputs after * parking the cleaner, and that can make the async reclaim task to hang * if it's waiting for delayed iputs to complete, since the cleaner is * parked and can not run delayed iputs - this will make us hang when * trying to stop the async reclaim task. */ cancel_work_sync(&fs_info->reclaim_bgs_work); /* * We don't want the cleaner to start new transactions, add more delayed * iputs, etc. while we're closing. We can't use kthread_stop() yet * because that frees the task_struct, and the transaction kthread might * still try to wake up the cleaner. */ kthread_park(fs_info->cleaner_kthread); /* wait for the qgroup rescan worker to stop */ btrfs_qgroup_wait_for_completion(fs_info, false); /* wait for the uuid_scan task to finish */ down(&fs_info->uuid_tree_rescan_sem); /* avoid complains from lockdep et al., set sem back to initial state */ up(&fs_info->uuid_tree_rescan_sem); /* pause restriper - we want to resume on mount */ btrfs_pause_balance(fs_info); btrfs_dev_replace_suspend_for_unmount(fs_info); btrfs_scrub_cancel(fs_info); /* wait for any defraggers to finish */ wait_event(fs_info->transaction_wait, (atomic_read(&fs_info->defrag_running) == 0)); /* clear out the rbtree of defraggable inodes */ btrfs_cleanup_defrag_inodes(fs_info); /* * Handle the error fs first, as it will flush and wait for all ordered * extents. This will generate delayed iputs, thus we want to handle * it first. */ if (unlikely(BTRFS_FS_ERROR(fs_info))) btrfs_error_commit_super(fs_info); /* * Wait for any fixup workers to complete. * If we don't wait for them here and they are still running by the time * we call kthread_stop() against the cleaner kthread further below, we * get an use-after-free on the cleaner because the fixup worker adds an * inode to the list of delayed iputs and then attempts to wakeup the * cleaner kthread, which was already stopped and destroyed. We parked * already the cleaner, but below we run all pending delayed iputs. */ btrfs_flush_workqueue(fs_info->fixup_workers); /* * Similar case here, we have to wait for delalloc workers before we * proceed below and stop the cleaner kthread, otherwise we trigger a * use-after-tree on the cleaner kthread task_struct when a delalloc * worker running submit_compressed_extents() adds a delayed iput, which * does a wake up on the cleaner kthread, which was already freed below * when we call kthread_stop(). */ btrfs_flush_workqueue(fs_info->delalloc_workers); /* * We can have ordered extents getting their last reference dropped from * the fs_info->workers queue because for async writes for data bios we * queue a work for that queue, at btrfs_wq_submit_bio(), that runs * run_one_async_done() which calls btrfs_bio_end_io() in case the bio * has an error, and that later function can do the final * btrfs_put_ordered_extent() on the ordered extent attached to the bio, * which adds a delayed iput for the inode. So we must flush the queue * so that we don't have delayed iputs after committing the current * transaction below and stopping the cleaner and transaction kthreads. */ btrfs_flush_workqueue(fs_info->workers); /* * When finishing a compressed write bio we schedule a work queue item * to finish an ordered extent - end_bbio_compressed_write() * calls btrfs_finish_ordered_extent() which in turns does a call to * btrfs_queue_ordered_fn(), and that queues the ordered extent * completion either in the endio_write_workers work queue or in the * fs_info->endio_freespace_worker work queue. We flush those queues * below, so before we flush them we must flush this queue for the * workers of compressed writes. */ flush_workqueue(fs_info->endio_workers); /* * After we parked the cleaner kthread, ordered extents may have * completed and created new delayed iputs. If one of the async reclaim * tasks is running and in the RUN_DELAYED_IPUTS flush state, then we * can hang forever trying to stop it, because if a delayed iput is * added after it ran btrfs_run_delayed_iputs() and before it called * btrfs_wait_on_delayed_iputs(), it will hang forever since there is * no one else to run iputs. * * So wait for all ongoing ordered extents to complete and then run * delayed iputs. This works because once we reach this point no one * can create new ordered extents, but delayed iputs can still be added * by a reclaim worker (see comments further below). * * Also note that btrfs_wait_ordered_roots() is not safe here, because * it waits for BTRFS_ORDERED_COMPLETE to be set on an ordered extent, * but the delayed iput for the respective inode is made only when doing * the final btrfs_put_ordered_extent() (which must happen at * btrfs_finish_ordered_io() when we are unmounting). */ btrfs_flush_workqueue(fs_info->endio_write_workers); /* Ordered extents for free space inodes. */ btrfs_flush_workqueue(fs_info->endio_freespace_worker); /* * Run delayed iputs in case an async reclaim worker is waiting for them * to be run as mentioned above. */ btrfs_run_delayed_iputs(fs_info); cancel_work_sync(&fs_info->async_reclaim_work); cancel_work_sync(&fs_info->async_data_reclaim_work); cancel_work_sync(&fs_info->preempt_reclaim_work); cancel_work_sync(&fs_info->em_shrinker_work); /* * Run delayed iputs again because an async reclaim worker may have * added new ones if it was flushing delalloc: * * shrink_delalloc() -> btrfs_start_delalloc_roots() -> * start_delalloc_inodes() -> btrfs_add_delayed_iput() */ btrfs_run_delayed_iputs(fs_info); /* There should be no more workload to generate new delayed iputs. */ set_bit(BTRFS_FS_STATE_NO_DELAYED_IPUT, &fs_info->fs_state); /* Cancel or finish ongoing discard work */ btrfs_discard_cleanup(fs_info); if (!sb_rdonly(fs_info->sb)) { /* * The cleaner kthread is stopped, so do one final pass over * unused block groups. */ btrfs_delete_unused_bgs(fs_info); /* * There might be existing delayed inode workers still running * and holding an empty delayed inode item. We must wait for * them to complete first because they can create a transaction. * This happens when someone calls btrfs_balance_delayed_items() * and then a transaction commit runs the same delayed nodes * before any delayed worker has done something with the nodes. * We must wait for any worker here and not at transaction * commit time since that could cause a deadlock. * This is a very rare case. */ btrfs_flush_workqueue(fs_info->delayed_workers); ret = btrfs_commit_super(fs_info); if (ret) btrfs_err(fs_info, "commit super ret %d", ret); } kthread_stop(fs_info->transaction_kthread); kthread_stop(fs_info->cleaner_kthread); ASSERT(list_empty(&fs_info->delayed_iputs)); set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags); if (btrfs_check_quota_leak(fs_info)) { DEBUG_WARN("qgroup reserved space leaked"); btrfs_err(fs_info, "qgroup reserved space leaked"); } btrfs_free_qgroup_config(fs_info); ASSERT(list_empty(&fs_info->delalloc_roots)); if (percpu_counter_sum(&fs_info->delalloc_bytes)) { btrfs_info(fs_info, "at unmount delalloc count %lld", percpu_counter_sum(&fs_info->delalloc_bytes)); } if (percpu_counter_sum(&fs_info->ordered_bytes)) btrfs_info(fs_info, "at unmount dio bytes count %lld", percpu_counter_sum(&fs_info->ordered_bytes)); btrfs_sysfs_remove_mounted(fs_info); btrfs_sysfs_remove_fsid(fs_info->fs_devices); btrfs_put_block_group_cache(fs_info); /* * we must make sure there is not any read request to * submit after we stopping all workers. */ invalidate_inode_pages2(fs_info->btree_inode->i_mapping); btrfs_stop_all_workers(fs_info); /* We shouldn't have any transaction open at this point */ warn_about_uncommitted_trans(fs_info); clear_bit(BTRFS_FS_OPEN, &fs_info->flags); free_root_pointers(fs_info, true); btrfs_free_fs_roots(fs_info); /* * We must free the block groups after dropping the fs_roots as we could * have had an IO error and have left over tree log blocks that aren't * cleaned up until the fs roots are freed. This makes the block group * accounting appear to be wrong because there's pending reserved bytes, * so make sure we do the block group cleanup afterwards. */ btrfs_free_block_groups(fs_info); iput(fs_info->btree_inode); btrfs_mapping_tree_free(fs_info); } void btrfs_mark_buffer_dirty(struct btrfs_trans_handle *trans, struct extent_buffer *buf) { struct btrfs_fs_info *fs_info = buf->fs_info; u64 transid = btrfs_header_generation(buf); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS /* * This is a fast path so only do this check if we have sanity tests * enabled. Normal people shouldn't be using unmapped buffers as dirty * outside of the sanity tests. */ if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &buf->bflags))) return; #endif /* This is an active transaction (its state < TRANS_STATE_UNBLOCKED). */ ASSERT(trans->transid == fs_info->generation); btrfs_assert_tree_write_locked(buf); if (unlikely(transid != fs_info->generation)) { btrfs_abort_transaction(trans, -EUCLEAN); btrfs_crit(fs_info, "dirty buffer transid mismatch, logical %llu found transid %llu running transid %llu", buf->start, transid, fs_info->generation); } set_extent_buffer_dirty(buf); } static void __btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info, int flush_delayed) { /* * looks as though older kernels can get into trouble with * this code, they end up stuck in balance_dirty_pages forever */ int ret; if (current->flags & PF_MEMALLOC) return; if (flush_delayed) btrfs_balance_delayed_items(fs_info); ret = __percpu_counter_compare(&fs_info->dirty_metadata_bytes, BTRFS_DIRTY_METADATA_THRESH, fs_info->dirty_metadata_batch); if (ret > 0) { balance_dirty_pages_ratelimited(fs_info->btree_inode->i_mapping); } } void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info) { __btrfs_btree_balance_dirty(fs_info, 1); } void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info) { __btrfs_btree_balance_dirty(fs_info, 0); } static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info) { /* cleanup FS via transaction */ btrfs_cleanup_transaction(fs_info); down_write(&fs_info->cleanup_work_sem); up_write(&fs_info->cleanup_work_sem); } static void btrfs_drop_all_logs(struct btrfs_fs_info *fs_info) { struct btrfs_root *gang[8]; u64 root_objectid = 0; int ret; spin_lock(&fs_info->fs_roots_radix_lock); while ((ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix, (void **)gang, root_objectid, ARRAY_SIZE(gang))) != 0) { int i; for (i = 0; i < ret; i++) gang[i] = btrfs_grab_root(gang[i]); spin_unlock(&fs_info->fs_roots_radix_lock); for (i = 0; i < ret; i++) { if (!gang[i]) continue; root_objectid = btrfs_root_id(gang[i]); btrfs_free_log(NULL, gang[i]); btrfs_put_root(gang[i]); } root_objectid++; spin_lock(&fs_info->fs_roots_radix_lock); } spin_unlock(&fs_info->fs_roots_radix_lock); btrfs_free_log_root_tree(NULL, fs_info); } static void btrfs_destroy_ordered_extents(struct btrfs_root *root) { struct btrfs_ordered_extent *ordered; spin_lock(&root->ordered_extent_lock); /* * This will just short circuit the ordered completion stuff which will * make sure the ordered extent gets properly cleaned up. */ list_for_each_entry(ordered, &root->ordered_extents, root_extent_list) set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); spin_unlock(&root->ordered_extent_lock); } static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info) { struct btrfs_root *root; LIST_HEAD(splice); spin_lock(&fs_info->ordered_root_lock); list_splice_init(&fs_info->ordered_roots, &splice); while (!list_empty(&splice)) { root = list_first_entry(&splice, struct btrfs_root, ordered_root); list_move_tail(&root->ordered_root, &fs_info->ordered_roots); spin_unlock(&fs_info->ordered_root_lock); btrfs_destroy_ordered_extents(root); cond_resched(); spin_lock(&fs_info->ordered_root_lock); } spin_unlock(&fs_info->ordered_root_lock); /* * We need this here because if we've been flipped read-only we won't * get sync() from the umount, so we need to make sure any ordered * extents that haven't had their dirty pages IO start writeout yet * actually get run and error out properly. */ btrfs_wait_ordered_roots(fs_info, U64_MAX, NULL); } static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root) { struct btrfs_inode *btrfs_inode; LIST_HEAD(splice); spin_lock(&root->delalloc_lock); list_splice_init(&root->delalloc_inodes, &splice); while (!list_empty(&splice)) { struct inode *inode = NULL; btrfs_inode = list_first_entry(&splice, struct btrfs_inode, delalloc_inodes); btrfs_del_delalloc_inode(btrfs_inode); spin_unlock(&root->delalloc_lock); /* * Make sure we get a live inode and that it'll not disappear * meanwhile. */ inode = igrab(&btrfs_inode->vfs_inode); if (inode) { unsigned int nofs_flag; nofs_flag = memalloc_nofs_save(); invalidate_inode_pages2(inode->i_mapping); memalloc_nofs_restore(nofs_flag); iput(inode); } spin_lock(&root->delalloc_lock); } spin_unlock(&root->delalloc_lock); } static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info) { struct btrfs_root *root; LIST_HEAD(splice); spin_lock(&fs_info->delalloc_root_lock); list_splice_init(&fs_info->delalloc_roots, &splice); while (!list_empty(&splice)) { root = list_first_entry(&splice, struct btrfs_root, delalloc_root); root = btrfs_grab_root(root); BUG_ON(!root); spin_unlock(&fs_info->delalloc_root_lock); btrfs_destroy_delalloc_inodes(root); btrfs_put_root(root); spin_lock(&fs_info->delalloc_root_lock); } spin_unlock(&fs_info->delalloc_root_lock); } static void btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info, struct extent_io_tree *dirty_pages, int mark) { struct extent_buffer *eb; u64 start = 0; u64 end; while (btrfs_find_first_extent_bit(dirty_pages, start, &start, &end, mark, NULL)) { btrfs_clear_extent_bit(dirty_pages, start, end, mark, NULL); while (start <= end) { eb = find_extent_buffer(fs_info, start); start += fs_info->nodesize; if (!eb) continue; btrfs_tree_lock(eb); wait_on_extent_buffer_writeback(eb); btrfs_clear_buffer_dirty(NULL, eb); btrfs_tree_unlock(eb); free_extent_buffer_stale(eb); } } } static void btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info, struct extent_io_tree *unpin) { u64 start; u64 end; while (1) { struct extent_state *cached_state = NULL; /* * The btrfs_finish_extent_commit() may get the same range as * ours between find_first_extent_bit and clear_extent_dirty. * Hence, hold the unused_bg_unpin_mutex to avoid double unpin * the same extent range. */ mutex_lock(&fs_info->unused_bg_unpin_mutex); if (!btrfs_find_first_extent_bit(unpin, 0, &start, &end, EXTENT_DIRTY, &cached_state)) { mutex_unlock(&fs_info->unused_bg_unpin_mutex); break; } btrfs_clear_extent_dirty(unpin, start, end, &cached_state); btrfs_free_extent_state(cached_state); btrfs_error_unpin_extent_range(fs_info, start, end); mutex_unlock(&fs_info->unused_bg_unpin_mutex); cond_resched(); } } static void btrfs_cleanup_bg_io(struct btrfs_block_group *cache) { struct inode *inode; inode = cache->io_ctl.inode; if (inode) { unsigned int nofs_flag; nofs_flag = memalloc_nofs_save(); invalidate_inode_pages2(inode->i_mapping); memalloc_nofs_restore(nofs_flag); BTRFS_I(inode)->generation = 0; cache->io_ctl.inode = NULL; iput(inode); } ASSERT(cache->io_ctl.pages == NULL); btrfs_put_block_group(cache); } void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans, struct btrfs_fs_info *fs_info) { struct btrfs_block_group *cache; spin_lock(&cur_trans->dirty_bgs_lock); while (!list_empty(&cur_trans->dirty_bgs)) { cache = list_first_entry(&cur_trans->dirty_bgs, struct btrfs_block_group, dirty_list); if (!list_empty(&cache->io_list)) { spin_unlock(&cur_trans->dirty_bgs_lock); list_del_init(&cache->io_list); btrfs_cleanup_bg_io(cache); spin_lock(&cur_trans->dirty_bgs_lock); } list_del_init(&cache->dirty_list); spin_lock(&cache->lock); cache->disk_cache_state = BTRFS_DC_ERROR; spin_unlock(&cache->lock); spin_unlock(&cur_trans->dirty_bgs_lock); btrfs_put_block_group(cache); btrfs_dec_delayed_refs_rsv_bg_updates(fs_info); spin_lock(&cur_trans->dirty_bgs_lock); } spin_unlock(&cur_trans->dirty_bgs_lock); /* * Refer to the definition of io_bgs member for details why it's safe * to use it without any locking */ while (!list_empty(&cur_trans->io_bgs)) { cache = list_first_entry(&cur_trans->io_bgs, struct btrfs_block_group, io_list); list_del_init(&cache->io_list); spin_lock(&cache->lock); cache->disk_cache_state = BTRFS_DC_ERROR; spin_unlock(&cache->lock); btrfs_cleanup_bg_io(cache); } } static void btrfs_free_all_qgroup_pertrans(struct btrfs_fs_info *fs_info) { struct btrfs_root *gang[8]; int i; int ret; spin_lock(&fs_info->fs_roots_radix_lock); while (1) { ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, (void **)gang, 0, ARRAY_SIZE(gang), BTRFS_ROOT_TRANS_TAG); if (ret == 0) break; for (i = 0; i < ret; i++) { struct btrfs_root *root = gang[i]; btrfs_qgroup_free_meta_all_pertrans(root); radix_tree_tag_clear(&fs_info->fs_roots_radix, (unsigned long)btrfs_root_id(root), BTRFS_ROOT_TRANS_TAG); } } spin_unlock(&fs_info->fs_roots_radix_lock); } void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans) { struct btrfs_fs_info *fs_info = cur_trans->fs_info; struct btrfs_device *dev, *tmp; btrfs_cleanup_dirty_bgs(cur_trans, fs_info); ASSERT(list_empty(&cur_trans->dirty_bgs)); ASSERT(list_empty(&cur_trans->io_bgs)); list_for_each_entry_safe(dev, tmp, &cur_trans->dev_update_list, post_commit_list) { list_del_init(&dev->post_commit_list); } btrfs_destroy_delayed_refs(cur_trans); cur_trans->state = TRANS_STATE_COMMIT_START; wake_up(&fs_info->transaction_blocked_wait); cur_trans->state = TRANS_STATE_UNBLOCKED; wake_up(&fs_info->transaction_wait); btrfs_destroy_marked_extents(fs_info, &cur_trans->dirty_pages, EXTENT_DIRTY); btrfs_destroy_pinned_extent(fs_info, &cur_trans->pinned_extents); cur_trans->state =TRANS_STATE_COMPLETED; wake_up(&cur_trans->commit_wait); } static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info) { struct btrfs_transaction *t; mutex_lock(&fs_info->transaction_kthread_mutex); spin_lock(&fs_info->trans_lock); while (!list_empty(&fs_info->trans_list)) { t = list_first_entry(&fs_info->trans_list, struct btrfs_transaction, list); if (t->state >= TRANS_STATE_COMMIT_PREP) { refcount_inc(&t->use_count); spin_unlock(&fs_info->trans_lock); btrfs_wait_for_commit(fs_info, t->transid); btrfs_put_transaction(t); spin_lock(&fs_info->trans_lock); continue; } if (t == fs_info->running_transaction) { t->state = TRANS_STATE_COMMIT_DOING; spin_unlock(&fs_info->trans_lock); /* * We wait for 0 num_writers since we don't hold a trans * handle open currently for this transaction. */ wait_event(t->writer_wait, atomic_read(&t->num_writers) == 0); } else { spin_unlock(&fs_info->trans_lock); } btrfs_cleanup_one_transaction(t); spin_lock(&fs_info->trans_lock); if (t == fs_info->running_transaction) fs_info->running_transaction = NULL; list_del_init(&t->list); spin_unlock(&fs_info->trans_lock); btrfs_put_transaction(t); trace_btrfs_transaction_commit(fs_info); spin_lock(&fs_info->trans_lock); } spin_unlock(&fs_info->trans_lock); btrfs_destroy_all_ordered_extents(fs_info); btrfs_destroy_delayed_inodes(fs_info); btrfs_assert_delayed_root_empty(fs_info); btrfs_destroy_all_delalloc_inodes(fs_info); btrfs_drop_all_logs(fs_info); btrfs_free_all_qgroup_pertrans(fs_info); mutex_unlock(&fs_info->transaction_kthread_mutex); return 0; } int btrfs_init_root_free_objectid(struct btrfs_root *root) { BTRFS_PATH_AUTO_FREE(path); int ret; struct extent_buffer *l; struct btrfs_key search_key; struct btrfs_key found_key; int slot; path = btrfs_alloc_path(); if (!path) return -ENOMEM; search_key.objectid = BTRFS_LAST_FREE_OBJECTID; search_key.type = -1; search_key.offset = (u64)-1; ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); if (ret < 0) return ret; if (unlikely(ret == 0)) { /* * Key with offset -1 found, there would have to exist a root * with such id, but this is out of valid range. */ return -EUCLEAN; } if (path->slots[0] > 0) { slot = path->slots[0] - 1; l = path->nodes[0]; btrfs_item_key_to_cpu(l, &found_key, slot); root->free_objectid = max_t(u64, found_key.objectid + 1, BTRFS_FIRST_FREE_OBJECTID); } else { root->free_objectid = BTRFS_FIRST_FREE_OBJECTID; } return 0; } int btrfs_get_free_objectid(struct btrfs_root *root, u64 *objectid) { int ret; mutex_lock(&root->objectid_mutex); if (unlikely(root->free_objectid >= BTRFS_LAST_FREE_OBJECTID)) { btrfs_warn(root->fs_info, "the objectid of root %llu reaches its highest value", btrfs_root_id(root)); ret = -ENOSPC; goto out; } *objectid = root->free_objectid++; ret = 0; out: mutex_unlock(&root->objectid_mutex); return ret; }
1 1 4 6 3 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 // SPDX-License-Identifier: GPL-2.0-only /* * AT and PS/2 keyboard driver * * Copyright (c) 1999-2002 Vojtech Pavlik */ /* * This driver can handle standard AT keyboards and PS/2 keyboards in * Translated and Raw Set 2 and Set 3, as well as AT keyboards on dumb * input-only controllers and AT keyboards connected over a one way RS232 * converter. */ #include <linux/delay.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/interrupt.h> #include <linux/init.h> #include <linux/input.h> #include <linux/input/vivaldi-fmap.h> #include <linux/serio.h> #include <linux/workqueue.h> #include <linux/libps2.h> #include <linux/mutex.h> #include <linux/dmi.h> #include <linux/property.h> #define DRIVER_DESC "AT and PS/2 keyboard driver" MODULE_AUTHOR("Vojtech Pavlik <vojtech@suse.cz>"); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); static int atkbd_set = 2; module_param_named(set, atkbd_set, int, 0); MODULE_PARM_DESC(set, "Select keyboard code set (2 = default, 3 = PS/2 native)"); #if defined(__i386__) || defined(__x86_64__) || defined(__hppa__) || defined(__loongarch__) static bool atkbd_reset; #else static bool atkbd_reset = true; #endif module_param_named(reset, atkbd_reset, bool, 0); MODULE_PARM_DESC(reset, "Reset keyboard during initialization"); static bool atkbd_softrepeat; module_param_named(softrepeat, atkbd_softrepeat, bool, 0); MODULE_PARM_DESC(softrepeat, "Use software keyboard repeat"); static bool atkbd_softraw = true; module_param_named(softraw, atkbd_softraw, bool, 0); MODULE_PARM_DESC(softraw, "Use software generated rawmode"); static bool atkbd_scroll; module_param_named(scroll, atkbd_scroll, bool, 0); MODULE_PARM_DESC(scroll, "Enable scroll-wheel on MS Office and similar keyboards"); static bool atkbd_extra; module_param_named(extra, atkbd_extra, bool, 0); MODULE_PARM_DESC(extra, "Enable extra LEDs and keys on IBM RapidAcces, EzKey and similar keyboards"); static bool atkbd_terminal; module_param_named(terminal, atkbd_terminal, bool, 0); MODULE_PARM_DESC(terminal, "Enable break codes on an IBM Terminal keyboard connected via AT/PS2"); #define SCANCODE(keymap) ((keymap >> 16) & 0xFFFF) #define KEYCODE(keymap) (keymap & 0xFFFF) /* * Scancode to keycode tables. These are just the default setting, and * are loadable via a userland utility. */ #define ATKBD_KEYMAP_SIZE 512 static const unsigned short atkbd_set2_keycode[ATKBD_KEYMAP_SIZE] = { #ifdef CONFIG_KEYBOARD_ATKBD_HP_KEYCODES /* XXX: need a more general approach */ #include "hpps2atkbd.h" /* include the keyboard scancodes */ #else 0, 67, 65, 63, 61, 59, 60, 88,183, 68, 66, 64, 62, 15, 41,117, 184, 56, 42, 93, 29, 16, 2, 0,185, 0, 44, 31, 30, 17, 3, 0, 186, 46, 45, 32, 18, 5, 4, 95,187, 57, 47, 33, 20, 19, 6,183, 188, 49, 48, 35, 34, 21, 7,184,189, 0, 50, 36, 22, 8, 9,185, 190, 51, 37, 23, 24, 11, 10, 0,191, 52, 53, 38, 39, 25, 12, 0, 192, 89, 40, 0, 26, 13, 0,193, 58, 54, 28, 27, 0, 43, 0,194, 0, 86, 91, 90, 92, 0, 14, 94, 0, 79,124, 75, 71,121, 0, 0, 82, 83, 80, 76, 77, 72, 1, 69, 87, 78, 81, 74, 55, 73, 70, 99, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 217,100,255, 0, 97,165, 0, 0,156, 0, 0, 0, 0, 0, 0,125, 173,114, 0,113, 0, 0, 0,126,128, 0, 0,140, 0, 0, 0,127, 159, 0,115, 0,164, 0, 0,116,158, 0,172,166, 0, 0, 0,142, 157, 0, 0, 0, 0, 0, 0, 0,155, 0, 98, 0, 0,163, 0, 0, 226, 0, 0, 0, 0, 0, 0, 0, 0,255, 96, 0, 0, 0,143, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,107, 0,105,102, 0, 0,112, 110,111,108,112,106,103, 0,119, 0,118,109, 0, 99,104,119, 0, 0, 0, 0, 65, 99, #endif }; static const unsigned short atkbd_set3_keycode[ATKBD_KEYMAP_SIZE] = { 0, 0, 0, 0, 0, 0, 0, 59, 1,138,128,129,130, 15, 41, 60, 131, 29, 42, 86, 58, 16, 2, 61,133, 56, 44, 31, 30, 17, 3, 62, 134, 46, 45, 32, 18, 5, 4, 63,135, 57, 47, 33, 20, 19, 6, 64, 136, 49, 48, 35, 34, 21, 7, 65,137,100, 50, 36, 22, 8, 9, 66, 125, 51, 37, 23, 24, 11, 10, 67,126, 52, 53, 38, 39, 25, 12, 68, 113,114, 40, 43, 26, 13, 87, 99, 97, 54, 28, 27, 43, 43, 88, 70, 108,105,119,103,111,107, 14,110, 0, 79,106, 75, 71,109,102,104, 82, 83, 80, 76, 77, 72, 69, 98, 0, 96, 81, 0, 78, 73, 55,183, 184,185,186,187, 74, 94, 92, 93, 0, 0, 0,125,126,127,112, 0, 0,139,172,163,165,115,152,172,166,140,160,154,113,114,167,168, 148,149,147,140 }; static const unsigned short atkbd_unxlate_table[128] = { 0,118, 22, 30, 38, 37, 46, 54, 61, 62, 70, 69, 78, 85,102, 13, 21, 29, 36, 45, 44, 53, 60, 67, 68, 77, 84, 91, 90, 20, 28, 27, 35, 43, 52, 51, 59, 66, 75, 76, 82, 14, 18, 93, 26, 34, 33, 42, 50, 49, 58, 65, 73, 74, 89,124, 17, 41, 88, 5, 6, 4, 12, 3, 11, 2, 10, 1, 9,119,126,108,117,125,123,107,115,116,121,105, 114,122,112,113,127, 96, 97,120, 7, 15, 23, 31, 39, 47, 55, 63, 71, 79, 86, 94, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 87,111, 19, 25, 57, 81, 83, 92, 95, 98, 99,100,101,103,104,106,109,110 }; #define ATKBD_CMD_SETLEDS 0x10ed #define ATKBD_CMD_GSCANSET 0x11f0 #define ATKBD_CMD_SSCANSET 0x10f0 #define ATKBD_CMD_GETID 0x02f2 #define ATKBD_CMD_SETREP 0x10f3 #define ATKBD_CMD_ENABLE 0x00f4 #define ATKBD_CMD_RESET_DIS 0x00f5 /* Reset to defaults and disable */ #define ATKBD_CMD_RESET_DEF 0x00f6 /* Reset to defaults */ #define ATKBD_CMD_SETALL_MB 0x00f8 /* Set all keys to give break codes */ #define ATKBD_CMD_SETALL_MBR 0x00fa /* ... and repeat */ #define ATKBD_CMD_RESET_BAT 0x02ff #define ATKBD_CMD_RESEND 0x00fe #define ATKBD_CMD_EX_ENABLE 0x10ea #define ATKBD_CMD_EX_SETLEDS 0x20eb #define ATKBD_CMD_OK_GETID 0x02e8 #define ATKBD_RET_ACK 0xfa #define ATKBD_RET_NAK 0xfe #define ATKBD_RET_BAT 0xaa #define ATKBD_RET_EMUL0 0xe0 #define ATKBD_RET_EMUL1 0xe1 #define ATKBD_RET_RELEASE 0xf0 #define ATKBD_RET_HANJA 0xf1 #define ATKBD_RET_HANGEUL 0xf2 #define ATKBD_RET_ERR 0xff #define ATKBD_KEY_UNKNOWN 0 #define ATKBD_KEY_NULL 255 #define ATKBD_SCR_1 0xfffe #define ATKBD_SCR_2 0xfffd #define ATKBD_SCR_4 0xfffc #define ATKBD_SCR_8 0xfffb #define ATKBD_SCR_CLICK 0xfffa #define ATKBD_SCR_LEFT 0xfff9 #define ATKBD_SCR_RIGHT 0xfff8 #define ATKBD_SPECIAL ATKBD_SCR_RIGHT #define ATKBD_LED_EVENT_BIT 0 #define ATKBD_REP_EVENT_BIT 1 #define ATKBD_XL_ERR 0x01 #define ATKBD_XL_BAT 0x02 #define ATKBD_XL_ACK 0x04 #define ATKBD_XL_NAK 0x08 #define ATKBD_XL_HANGEUL 0x10 #define ATKBD_XL_HANJA 0x20 static const struct { unsigned short keycode; unsigned char set2; } atkbd_scroll_keys[] = { { ATKBD_SCR_1, 0xc5 }, { ATKBD_SCR_2, 0x9d }, { ATKBD_SCR_4, 0xa4 }, { ATKBD_SCR_8, 0x9b }, { ATKBD_SCR_CLICK, 0xe0 }, { ATKBD_SCR_LEFT, 0xcb }, { ATKBD_SCR_RIGHT, 0xd2 }, }; /* * The atkbd control structure */ struct atkbd { struct ps2dev ps2dev; struct input_dev *dev; /* Written only during init */ char name[64]; char phys[32]; unsigned short id; unsigned short keycode[ATKBD_KEYMAP_SIZE]; DECLARE_BITMAP(force_release_mask, ATKBD_KEYMAP_SIZE); unsigned char set; bool translated; bool extra; bool write; bool softrepeat; bool softraw; bool scroll; bool enabled; /* Accessed only from interrupt */ unsigned char emul; bool resend; bool release; unsigned long xl_bit; unsigned int last; unsigned long time; unsigned long err_count; struct delayed_work event_work; unsigned long event_jiffies; unsigned long event_mask; /* Serializes reconnect(), attr->set() and event work */ struct mutex mutex; struct vivaldi_data vdata; }; /* * System-specific keymap fixup routine */ static void (*atkbd_platform_fixup)(struct atkbd *, const void *data); static void *atkbd_platform_fixup_data; static unsigned int (*atkbd_platform_scancode_fixup)(struct atkbd *, unsigned int); /* * Certain keyboards to not like ATKBD_CMD_RESET_DIS and stop responding * to many commands until full reset (ATKBD_CMD_RESET_BAT) is performed. */ static bool atkbd_skip_deactivate; static ssize_t atkbd_attr_show_helper(struct device *dev, char *buf, ssize_t (*handler)(struct atkbd *, char *)); static ssize_t atkbd_attr_set_helper(struct device *dev, const char *buf, size_t count, ssize_t (*handler)(struct atkbd *, const char *, size_t)); #define ATKBD_DEFINE_ATTR(_name) \ static ssize_t atkbd_show_##_name(struct atkbd *, char *); \ static ssize_t atkbd_set_##_name(struct atkbd *, const char *, size_t); \ static ssize_t atkbd_do_show_##_name(struct device *d, \ struct device_attribute *attr, char *b) \ { \ return atkbd_attr_show_helper(d, b, atkbd_show_##_name); \ } \ static ssize_t atkbd_do_set_##_name(struct device *d, \ struct device_attribute *attr, const char *b, size_t s) \ { \ return atkbd_attr_set_helper(d, b, s, atkbd_set_##_name); \ } \ static struct device_attribute atkbd_attr_##_name = \ __ATTR(_name, S_IWUSR | S_IRUGO, atkbd_do_show_##_name, atkbd_do_set_##_name); ATKBD_DEFINE_ATTR(extra); ATKBD_DEFINE_ATTR(force_release); ATKBD_DEFINE_ATTR(scroll); ATKBD_DEFINE_ATTR(set); ATKBD_DEFINE_ATTR(softrepeat); ATKBD_DEFINE_ATTR(softraw); #define ATKBD_DEFINE_RO_ATTR(_name) \ static ssize_t atkbd_show_##_name(struct atkbd *, char *); \ static ssize_t atkbd_do_show_##_name(struct device *d, \ struct device_attribute *attr, char *b) \ { \ return atkbd_attr_show_helper(d, b, atkbd_show_##_name); \ } \ static struct device_attribute atkbd_attr_##_name = \ __ATTR(_name, S_IRUGO, atkbd_do_show_##_name, NULL); ATKBD_DEFINE_RO_ATTR(err_count); ATKBD_DEFINE_RO_ATTR(function_row_physmap); static struct attribute *atkbd_attributes[] = { &atkbd_attr_extra.attr, &atkbd_attr_force_release.attr, &atkbd_attr_scroll.attr, &atkbd_attr_set.attr, &atkbd_attr_softrepeat.attr, &atkbd_attr_softraw.attr, &atkbd_attr_err_count.attr, &atkbd_attr_function_row_physmap.attr, NULL }; static ssize_t atkbd_show_function_row_physmap(struct atkbd *atkbd, char *buf) { return vivaldi_function_row_physmap_show(&atkbd->vdata, buf); } static struct atkbd *atkbd_from_serio(struct serio *serio) { struct ps2dev *ps2dev = serio_get_drvdata(serio); return container_of(ps2dev, struct atkbd, ps2dev); } static umode_t atkbd_attr_is_visible(struct kobject *kobj, struct attribute *attr, int i) { struct device *dev = kobj_to_dev(kobj); struct serio *serio = to_serio_port(dev); struct atkbd *atkbd = atkbd_from_serio(serio); if (attr == &atkbd_attr_function_row_physmap.attr && !atkbd->vdata.num_function_row_keys) return 0; return attr->mode; } static const struct attribute_group atkbd_attribute_group = { .attrs = atkbd_attributes, .is_visible = atkbd_attr_is_visible, }; __ATTRIBUTE_GROUPS(atkbd_attribute); static const unsigned int xl_table[] = { ATKBD_RET_BAT, ATKBD_RET_ERR, ATKBD_RET_ACK, ATKBD_RET_NAK, ATKBD_RET_HANJA, ATKBD_RET_HANGEUL, }; /* * Checks if we should mangle the scancode to extract 'release' bit * in translated mode. */ static bool atkbd_need_xlate(unsigned long xl_bit, unsigned char code) { int i; if (code == ATKBD_RET_EMUL0 || code == ATKBD_RET_EMUL1) return false; for (i = 0; i < ARRAY_SIZE(xl_table); i++) if (code == xl_table[i]) return test_bit(i, &xl_bit); return true; } /* * Calculates new value of xl_bit so the driver can distinguish * between make/break pair of scancodes for select keys and PS/2 * protocol responses. */ static void atkbd_calculate_xl_bit(struct atkbd *atkbd, unsigned char code) { int i; for (i = 0; i < ARRAY_SIZE(xl_table); i++) { if (!((code ^ xl_table[i]) & 0x7f)) { if (code & 0x80) __clear_bit(i, &atkbd->xl_bit); else __set_bit(i, &atkbd->xl_bit); break; } } } /* * Encode the scancode, 0xe0 prefix, and high bit into a single integer, * keeping kernel 2.4 compatibility for set 2 */ static unsigned int atkbd_compat_scancode(struct atkbd *atkbd, unsigned int code) { if (atkbd->set == 3) { if (atkbd->emul == 1) code |= 0x100; } else { code = (code & 0x7f) | ((code & 0x80) << 1); if (atkbd->emul == 1) code |= 0x80; } return code; } /* * Tries to handle frame or parity error by requesting the keyboard controller * to resend the last byte. This historically not done on x86 as controllers * there typically do not implement this command. */ static bool __maybe_unused atkbd_handle_frame_error(struct ps2dev *ps2dev, u8 data, unsigned int flags) { struct atkbd *atkbd = container_of(ps2dev, struct atkbd, ps2dev); struct serio *serio = ps2dev->serio; if ((flags & (SERIO_FRAME | SERIO_PARITY)) && (~flags & SERIO_TIMEOUT) && !atkbd->resend && atkbd->write) { dev_warn(&serio->dev, "Frame/parity error: %02x\n", flags); serio_write(serio, ATKBD_CMD_RESEND); atkbd->resend = true; return true; } if (!flags && data == ATKBD_RET_ACK) atkbd->resend = false; return false; } static enum ps2_disposition atkbd_pre_receive_byte(struct ps2dev *ps2dev, u8 data, unsigned int flags) { struct serio *serio = ps2dev->serio; dev_dbg(&serio->dev, "Received %02x flags %02x\n", data, flags); #if !defined(__i386__) && !defined (__x86_64__) if (atkbd_handle_frame_error(ps2dev, data, flags)) return PS2_IGNORE; #endif return PS2_PROCESS; } static void atkbd_receive_byte(struct ps2dev *ps2dev, u8 data) { struct serio *serio = ps2dev->serio; struct atkbd *atkbd = container_of(ps2dev, struct atkbd, ps2dev); struct input_dev *dev = atkbd->dev; unsigned int code = data; int scroll = 0, hscroll = 0, click = -1; int value; unsigned short keycode; pm_wakeup_event(&serio->dev, 0); if (!atkbd->enabled) return; input_event(dev, EV_MSC, MSC_RAW, code); if (atkbd_platform_scancode_fixup) code = atkbd_platform_scancode_fixup(atkbd, code); if (atkbd->translated) { if (atkbd->emul || atkbd_need_xlate(atkbd->xl_bit, code)) { atkbd->release = code >> 7; code &= 0x7f; } if (!atkbd->emul) atkbd_calculate_xl_bit(atkbd, data); } switch (code) { case ATKBD_RET_BAT: atkbd->enabled = false; serio_reconnect(atkbd->ps2dev.serio); return; case ATKBD_RET_EMUL0: atkbd->emul = 1; return; case ATKBD_RET_EMUL1: atkbd->emul = 2; return; case ATKBD_RET_RELEASE: atkbd->release = true; return; case ATKBD_RET_ACK: case ATKBD_RET_NAK: if (printk_ratelimit()) dev_warn(&serio->dev, "Spurious %s on %s. " "Some program might be trying to access hardware directly.\n", data == ATKBD_RET_ACK ? "ACK" : "NAK", serio->phys); return; case ATKBD_RET_ERR: atkbd->err_count++; dev_dbg(&serio->dev, "Keyboard on %s reports too many keys pressed.\n", serio->phys); return; } code = atkbd_compat_scancode(atkbd, code); if (atkbd->emul && --atkbd->emul) return; keycode = atkbd->keycode[code]; if (!(atkbd->release && test_bit(code, atkbd->force_release_mask))) if (keycode != ATKBD_KEY_NULL) input_event(dev, EV_MSC, MSC_SCAN, code); switch (keycode) { case ATKBD_KEY_NULL: break; case ATKBD_KEY_UNKNOWN: dev_warn(&serio->dev, "Unknown key %s (%s set %d, code %#x on %s).\n", atkbd->release ? "released" : "pressed", atkbd->translated ? "translated" : "raw", atkbd->set, code, serio->phys); dev_warn(&serio->dev, "Use 'setkeycodes %s%02x <keycode>' to make it known.\n", code & 0x80 ? "e0" : "", code & 0x7f); input_sync(dev); break; case ATKBD_SCR_1: scroll = 1; break; case ATKBD_SCR_2: scroll = 2; break; case ATKBD_SCR_4: scroll = 4; break; case ATKBD_SCR_8: scroll = 8; break; case ATKBD_SCR_CLICK: click = !atkbd->release; break; case ATKBD_SCR_LEFT: hscroll = -1; break; case ATKBD_SCR_RIGHT: hscroll = 1; break; default: if (atkbd->release) { value = 0; atkbd->last = 0; } else if (!atkbd->softrepeat && test_bit(keycode, dev->key)) { /* Workaround Toshiba laptop multiple keypress */ value = time_before(jiffies, atkbd->time) && atkbd->last == code ? 1 : 2; } else { value = 1; atkbd->last = code; atkbd->time = jiffies + msecs_to_jiffies(dev->rep[REP_DELAY]) / 2; } input_event(dev, EV_KEY, keycode, value); input_sync(dev); if (value && test_bit(code, atkbd->force_release_mask)) { input_event(dev, EV_MSC, MSC_SCAN, code); input_report_key(dev, keycode, 0); input_sync(dev); } } if (atkbd->scroll) { if (click != -1) input_report_key(dev, BTN_MIDDLE, click); input_report_rel(dev, REL_WHEEL, atkbd->release ? -scroll : scroll); input_report_rel(dev, REL_HWHEEL, hscroll); input_sync(dev); } atkbd->release = false; } static int atkbd_set_repeat_rate(struct atkbd *atkbd) { const short period[32] = { 33, 37, 42, 46, 50, 54, 58, 63, 67, 75, 83, 92, 100, 109, 116, 125, 133, 149, 167, 182, 200, 217, 232, 250, 270, 303, 333, 370, 400, 435, 470, 500 }; const short delay[4] = { 250, 500, 750, 1000 }; struct input_dev *dev = atkbd->dev; unsigned char param; int i = 0, j = 0; while (i < ARRAY_SIZE(period) - 1 && period[i] < dev->rep[REP_PERIOD]) i++; dev->rep[REP_PERIOD] = period[i]; while (j < ARRAY_SIZE(delay) - 1 && delay[j] < dev->rep[REP_DELAY]) j++; dev->rep[REP_DELAY] = delay[j]; param = i | (j << 5); return ps2_command(&atkbd->ps2dev, &param, ATKBD_CMD_SETREP); } static int atkbd_set_leds(struct atkbd *atkbd) { struct input_dev *dev = atkbd->dev; unsigned char param[2]; param[0] = (test_bit(LED_SCROLLL, dev->led) ? 1 : 0) | (test_bit(LED_NUML, dev->led) ? 2 : 0) | (test_bit(LED_CAPSL, dev->led) ? 4 : 0); if (ps2_command(&atkbd->ps2dev, param, ATKBD_CMD_SETLEDS)) return -1; if (atkbd->extra) { param[0] = 0; param[1] = (test_bit(LED_COMPOSE, dev->led) ? 0x01 : 0) | (test_bit(LED_SLEEP, dev->led) ? 0x02 : 0) | (test_bit(LED_SUSPEND, dev->led) ? 0x04 : 0) | (test_bit(LED_MISC, dev->led) ? 0x10 : 0) | (test_bit(LED_MUTE, dev->led) ? 0x20 : 0); if (ps2_command(&atkbd->ps2dev, param, ATKBD_CMD_EX_SETLEDS)) return -1; } return 0; } /* * atkbd_event_work() is used to complete processing of events that * can not be processed by input_event() which is often called from * interrupt context. */ static void atkbd_event_work(struct work_struct *work) { struct atkbd *atkbd = container_of(work, struct atkbd, event_work.work); guard(mutex)(&atkbd->mutex); if (!atkbd->enabled) { /* * Serio ports are resumed asynchronously so while driver core * thinks that device is already fully operational in reality * it may not be ready yet. In this case we need to keep * rescheduling till reconnect completes. */ schedule_delayed_work(&atkbd->event_work, msecs_to_jiffies(100)); } else { if (test_and_clear_bit(ATKBD_LED_EVENT_BIT, &atkbd->event_mask)) atkbd_set_leds(atkbd); if (test_and_clear_bit(ATKBD_REP_EVENT_BIT, &atkbd->event_mask)) atkbd_set_repeat_rate(atkbd); } } /* * Schedule switch for execution. We need to throttle requests, * otherwise keyboard may become unresponsive. */ static void atkbd_schedule_event_work(struct atkbd *atkbd, int event_bit) { unsigned long delay = msecs_to_jiffies(50); if (time_after(jiffies, atkbd->event_jiffies + delay)) delay = 0; atkbd->event_jiffies = jiffies; set_bit(event_bit, &atkbd->event_mask); mb(); schedule_delayed_work(&atkbd->event_work, delay); } /* * Event callback from the input module. Events that change the state of * the hardware are processed here. If action can not be performed in * interrupt context it is offloaded to atkbd_event_work. */ static int atkbd_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) { struct atkbd *atkbd = input_get_drvdata(dev); if (!atkbd->write) return -1; switch (type) { case EV_LED: atkbd_schedule_event_work(atkbd, ATKBD_LED_EVENT_BIT); return 0; case EV_REP: if (!atkbd->softrepeat) atkbd_schedule_event_work(atkbd, ATKBD_REP_EVENT_BIT); return 0; default: return -1; } } /* * atkbd_enable() signals that interrupt handler is allowed to * generate input events. */ static inline void atkbd_enable(struct atkbd *atkbd) { guard(serio_pause_rx)(atkbd->ps2dev.serio); atkbd->enabled = true; } /* * atkbd_disable() tells input handler that all incoming data except * for ACKs and command response should be dropped. */ static inline void atkbd_disable(struct atkbd *atkbd) { guard(serio_pause_rx)(atkbd->ps2dev.serio); atkbd->enabled = false; } static int atkbd_activate(struct atkbd *atkbd) { struct ps2dev *ps2dev = &atkbd->ps2dev; /* * Enable the keyboard to receive keystrokes. */ if (ps2_command(ps2dev, NULL, ATKBD_CMD_ENABLE)) { dev_err(&ps2dev->serio->dev, "Failed to enable keyboard on %s\n", ps2dev->serio->phys); return -1; } return 0; } /* * atkbd_deactivate() resets and disables the keyboard from sending * keystrokes. */ static void atkbd_deactivate(struct atkbd *atkbd) { struct ps2dev *ps2dev = &atkbd->ps2dev; if (ps2_command(ps2dev, NULL, ATKBD_CMD_RESET_DIS)) dev_err(&ps2dev->serio->dev, "Failed to deactivate keyboard on %s\n", ps2dev->serio->phys); } #ifdef CONFIG_X86 static bool atkbd_is_portable_device(void) { static const char * const chassis_types[] = { "8", /* Portable */ "9", /* Laptop */ "10", /* Notebook */ "14", /* Sub-Notebook */ "31", /* Convertible */ "32", /* Detachable */ }; int i; for (i = 0; i < ARRAY_SIZE(chassis_types); i++) if (dmi_match(DMI_CHASSIS_TYPE, chassis_types[i])) return true; return false; } /* * On many modern laptops ATKBD_CMD_GETID may cause problems, on these laptops * the controller is always in translated mode. In this mode mice/touchpads will * not work. So in this case simply assume a keyboard is connected to avoid * confusing some laptop keyboards. * * Skipping ATKBD_CMD_GETID ends up using a fake keyboard id. Using the standard * 0xab83 id is ok in translated mode, only atkbd_select_set() checks atkbd->id * and in translated mode that is a no-op. */ static bool atkbd_skip_getid(struct atkbd *atkbd) { return atkbd->translated && atkbd_is_portable_device(); } #else static inline bool atkbd_skip_getid(struct atkbd *atkbd) { return false; } #endif /* * atkbd_probe() probes for an AT keyboard on a serio port. */ static int atkbd_probe(struct atkbd *atkbd) { struct ps2dev *ps2dev = &atkbd->ps2dev; unsigned char param[2]; /* * Some systems, where the bit-twiddling when testing the io-lines of the * controller may confuse the keyboard need a full reset of the keyboard. On * these systems the BIOS also usually doesn't do it for us. */ if (atkbd_reset) if (ps2_command(ps2dev, NULL, ATKBD_CMD_RESET_BAT)) dev_warn(&ps2dev->serio->dev, "keyboard reset failed on %s\n", ps2dev->serio->phys); if (atkbd_skip_getid(atkbd)) { atkbd->id = 0xab83; goto deactivate_kbd; } /* * Then we check the keyboard ID. We should get 0xab83 under normal conditions. * Some keyboards report different values, but the first byte is always 0xab or * 0xac. Some old AT keyboards don't report anything. If a mouse is connected, this * should make sure we don't try to set the LEDs on it. */ param[0] = param[1] = 0xa5; /* initialize with invalid values */ if (ps2_command(ps2dev, param, ATKBD_CMD_GETID)) { /* * If the get ID command failed, we check if we can at least set * the LEDs on the keyboard. This should work on every keyboard out there. * It also turns the LEDs off, which we want anyway. */ param[0] = 0; if (ps2_command(ps2dev, param, ATKBD_CMD_SETLEDS)) return -1; atkbd->id = 0xabba; return 0; } if (!ps2_is_keyboard_id(param[0])) return -1; atkbd->id = (param[0] << 8) | param[1]; if (atkbd->id == 0xaca1 && atkbd->translated) { dev_err(&ps2dev->serio->dev, "NCD terminal keyboards are only supported on non-translating controllers. " "Use i8042.direct=1 to disable translation.\n"); return -1; } deactivate_kbd: /* * Make sure nothing is coming from the keyboard and disturbs our * internal state. */ if (!atkbd_skip_deactivate) atkbd_deactivate(atkbd); return 0; } /* * atkbd_select_set checks if a keyboard has a working Set 3 support, and * sets it into that. Unfortunately there are keyboards that can be switched * to Set 3, but don't work well in that (BTC Multimedia ...) */ static int atkbd_select_set(struct atkbd *atkbd, int target_set, int allow_extra) { struct ps2dev *ps2dev = &atkbd->ps2dev; unsigned char param[2]; atkbd->extra = false; /* * For known special keyboards we can go ahead and set the correct set. * We check for NCD PS/2 Sun, NorthGate OmniKey 101 and * IBM RapidAccess / IBM EzButton / Chicony KBP-8993 keyboards. */ if (atkbd->translated) return 2; if (atkbd->id == 0xaca1) { param[0] = 3; ps2_command(ps2dev, param, ATKBD_CMD_SSCANSET); return 3; } if (allow_extra) { param[0] = 0x71; if (!ps2_command(ps2dev, param, ATKBD_CMD_EX_ENABLE)) { atkbd->extra = true; return 2; } } if (atkbd_terminal) { ps2_command(ps2dev, param, ATKBD_CMD_SETALL_MB); return 3; } if (target_set != 3) return 2; if (!ps2_command(ps2dev, param, ATKBD_CMD_OK_GETID)) { atkbd->id = param[0] << 8 | param[1]; return 2; } param[0] = 3; if (ps2_command(ps2dev, param, ATKBD_CMD_SSCANSET)) return 2; param[0] = 0; if (ps2_command(ps2dev, param, ATKBD_CMD_GSCANSET)) return 2; if (param[0] != 3) { param[0] = 2; if (ps2_command(ps2dev, param, ATKBD_CMD_SSCANSET)) return 2; } ps2_command(ps2dev, param, ATKBD_CMD_SETALL_MBR); return 3; } static int atkbd_reset_state(struct atkbd *atkbd) { struct ps2dev *ps2dev = &atkbd->ps2dev; unsigned char param[1]; /* * Set the LEDs to a predefined state (all off). */ param[0] = 0; if (ps2_command(ps2dev, param, ATKBD_CMD_SETLEDS)) return -1; /* * Set autorepeat to fastest possible. */ param[0] = 0; if (ps2_command(ps2dev, param, ATKBD_CMD_SETREP)) return -1; return 0; } /* * atkbd_cleanup() restores the keyboard state so that BIOS is happy after a * reboot. */ static void atkbd_cleanup(struct serio *serio) { struct atkbd *atkbd = atkbd_from_serio(serio); atkbd_disable(atkbd); ps2_command(&atkbd->ps2dev, NULL, ATKBD_CMD_RESET_DEF); } /* * atkbd_disconnect() closes and frees. */ static void atkbd_disconnect(struct serio *serio) { struct atkbd *atkbd = atkbd_from_serio(serio); atkbd_disable(atkbd); input_unregister_device(atkbd->dev); /* * Make sure we don't have a command in flight. * Note that since atkbd->enabled is false event work will keep * rescheduling itself until it gets canceled and will not try * accessing freed input device or serio port. */ cancel_delayed_work_sync(&atkbd->event_work); serio_close(serio); serio_set_drvdata(serio, NULL); kfree(atkbd); } /* * generate release events for the keycodes given in data */ static void atkbd_apply_forced_release_keylist(struct atkbd* atkbd, const void *data) { const unsigned int *keys = data; unsigned int i; if (atkbd->set == 2) for (i = 0; keys[i] != -1U; i++) __set_bit(keys[i], atkbd->force_release_mask); } /* * Most special keys (Fn+F?) on Dell laptops do not generate release * events so we have to do it ourselves. */ static unsigned int atkbd_dell_laptop_forced_release_keys[] = { 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8f, 0x93, -1U }; /* * Perform fixup for HP system that doesn't generate release * for its video switch */ static unsigned int atkbd_hp_forced_release_keys[] = { 0x94, -1U }; /* * Samsung NC10,NC20 with Fn+F? key release not working */ static unsigned int atkbd_samsung_forced_release_keys[] = { 0x82, 0x83, 0x84, 0x86, 0x88, 0x89, 0xb3, 0xf7, 0xf9, -1U }; /* * Amilo Pi 3525 key release for Fn+Volume keys not working */ static unsigned int atkbd_amilo_pi3525_forced_release_keys[] = { 0x20, 0xa0, 0x2e, 0xae, 0x30, 0xb0, -1U }; /* * Amilo Xi 3650 key release for light touch bar not working */ static unsigned int atkbd_amilo_xi3650_forced_release_keys[] = { 0x67, 0xed, 0x90, 0xa2, 0x99, 0xa4, 0xae, 0xb0, -1U }; /* * Soltech TA12 system with broken key release on volume keys and mute key */ static unsigned int atkdb_soltech_ta12_forced_release_keys[] = { 0xa0, 0xae, 0xb0, -1U }; /* * Many notebooks don't send key release event for volume up/down * keys, with key list below common among them */ static unsigned int atkbd_volume_forced_release_keys[] = { 0xae, 0xb0, -1U }; /* * OQO 01+ multimedia keys (64--66) generate e0 6x upon release whereas * they should be generating e4-e6 (0x80 | code). */ static unsigned int atkbd_oqo_01plus_scancode_fixup(struct atkbd *atkbd, unsigned int code) { if (atkbd->translated && atkbd->emul == 1 && (code == 0x64 || code == 0x65 || code == 0x66)) { atkbd->emul = 0; code |= 0x80; } return code; } static int atkbd_get_keymap_from_fwnode(struct atkbd *atkbd) { struct device *dev = &atkbd->ps2dev.serio->dev; int i, n; u32 *ptr; u16 scancode, keycode; /* Parse "linux,keymap" property */ n = device_property_count_u32(dev, "linux,keymap"); if (n <= 0 || n > ATKBD_KEYMAP_SIZE) return -ENXIO; ptr = kcalloc(n, sizeof(u32), GFP_KERNEL); if (!ptr) return -ENOMEM; if (device_property_read_u32_array(dev, "linux,keymap", ptr, n)) { dev_err(dev, "problem parsing FW keymap property\n"); kfree(ptr); return -EINVAL; } memset(atkbd->keycode, 0, sizeof(atkbd->keycode)); for (i = 0; i < n; i++) { scancode = SCANCODE(ptr[i]); keycode = KEYCODE(ptr[i]); atkbd->keycode[scancode] = keycode; } kfree(ptr); return 0; } /* * atkbd_set_keycode_table() initializes keyboard's keycode table * according to the selected scancode set */ static void atkbd_set_keycode_table(struct atkbd *atkbd) { struct device *dev = &atkbd->ps2dev.serio->dev; unsigned int scancode; int i, j; memset(atkbd->keycode, 0, sizeof(atkbd->keycode)); bitmap_zero(atkbd->force_release_mask, ATKBD_KEYMAP_SIZE); if (!atkbd_get_keymap_from_fwnode(atkbd)) { dev_dbg(dev, "Using FW keymap\n"); } else if (atkbd->translated) { for (i = 0; i < 128; i++) { scancode = atkbd_unxlate_table[i]; atkbd->keycode[i] = atkbd_set2_keycode[scancode]; atkbd->keycode[i | 0x80] = atkbd_set2_keycode[scancode | 0x80]; if (atkbd->scroll) for (j = 0; j < ARRAY_SIZE(atkbd_scroll_keys); j++) if ((scancode | 0x80) == atkbd_scroll_keys[j].set2) atkbd->keycode[i | 0x80] = atkbd_scroll_keys[j].keycode; } } else if (atkbd->set == 3) { memcpy(atkbd->keycode, atkbd_set3_keycode, sizeof(atkbd->keycode)); } else { memcpy(atkbd->keycode, atkbd_set2_keycode, sizeof(atkbd->keycode)); if (atkbd->scroll) for (i = 0; i < ARRAY_SIZE(atkbd_scroll_keys); i++) { scancode = atkbd_scroll_keys[i].set2; atkbd->keycode[scancode] = atkbd_scroll_keys[i].keycode; } } /* * HANGEUL and HANJA keys do not send release events so we need to * generate such events ourselves */ scancode = atkbd_compat_scancode(atkbd, ATKBD_RET_HANGEUL); atkbd->keycode[scancode] = KEY_HANGEUL; __set_bit(scancode, atkbd->force_release_mask); scancode = atkbd_compat_scancode(atkbd, ATKBD_RET_HANJA); atkbd->keycode[scancode] = KEY_HANJA; __set_bit(scancode, atkbd->force_release_mask); /* * Perform additional fixups */ if (atkbd_platform_fixup) atkbd_platform_fixup(atkbd, atkbd_platform_fixup_data); } /* * atkbd_set_device_attrs() sets up keyboard's input device structure */ static void atkbd_set_device_attrs(struct atkbd *atkbd) { struct input_dev *input_dev = atkbd->dev; int i; if (atkbd->extra) snprintf(atkbd->name, sizeof(atkbd->name), "AT Set 2 Extra keyboard"); else snprintf(atkbd->name, sizeof(atkbd->name), "AT %s Set %d keyboard", atkbd->translated ? "Translated" : "Raw", atkbd->set); scnprintf(atkbd->phys, sizeof(atkbd->phys), "%s/input0", atkbd->ps2dev.serio->phys); input_dev->name = atkbd->name; input_dev->phys = atkbd->phys; input_dev->id.bustype = BUS_I8042; input_dev->id.vendor = 0x0001; input_dev->id.product = atkbd->translated ? 1 : atkbd->set; input_dev->id.version = atkbd->id; input_dev->event = atkbd_event; input_dev->dev.parent = &atkbd->ps2dev.serio->dev; input_set_drvdata(input_dev, atkbd); input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP) | BIT_MASK(EV_MSC); if (atkbd->write) { input_dev->evbit[0] |= BIT_MASK(EV_LED); input_dev->ledbit[0] = BIT_MASK(LED_NUML) | BIT_MASK(LED_CAPSL) | BIT_MASK(LED_SCROLLL); } if (atkbd->extra) input_dev->ledbit[0] |= BIT_MASK(LED_COMPOSE) | BIT_MASK(LED_SUSPEND) | BIT_MASK(LED_SLEEP) | BIT_MASK(LED_MUTE) | BIT_MASK(LED_MISC); if (!atkbd->softrepeat) { input_dev->rep[REP_DELAY] = 250; input_dev->rep[REP_PERIOD] = 33; } input_dev->mscbit[0] = atkbd->softraw ? BIT_MASK(MSC_SCAN) : BIT_MASK(MSC_RAW) | BIT_MASK(MSC_SCAN); if (atkbd->scroll) { input_dev->evbit[0] |= BIT_MASK(EV_REL); input_dev->relbit[0] = BIT_MASK(REL_WHEEL) | BIT_MASK(REL_HWHEEL); __set_bit(BTN_MIDDLE, input_dev->keybit); } input_dev->keycode = atkbd->keycode; input_dev->keycodesize = sizeof(unsigned short); input_dev->keycodemax = ARRAY_SIZE(atkbd_set2_keycode); for (i = 0; i < ATKBD_KEYMAP_SIZE; i++) { if (atkbd->keycode[i] != KEY_RESERVED && atkbd->keycode[i] != ATKBD_KEY_NULL && atkbd->keycode[i] < ATKBD_SPECIAL) { __set_bit(atkbd->keycode[i], input_dev->keybit); } } } static void atkbd_parse_fwnode_data(struct serio *serio) { struct atkbd *atkbd = atkbd_from_serio(serio); struct device *dev = &serio->dev; int n; /* Parse "function-row-physmap" property */ n = device_property_count_u32(dev, "function-row-physmap"); if (n > 0 && n <= VIVALDI_MAX_FUNCTION_ROW_KEYS && !device_property_read_u32_array(dev, "function-row-physmap", atkbd->vdata.function_row_physmap, n)) { atkbd->vdata.num_function_row_keys = n; dev_dbg(dev, "FW reported %d function-row key locations\n", n); } } /* * atkbd_connect() is called when the serio module finds an interface * that isn't handled yet by an appropriate device driver. We check if * there is an AT keyboard out there and if yes, we register ourselves * to the input module. */ static int atkbd_connect(struct serio *serio, struct serio_driver *drv) { struct atkbd *atkbd; struct input_dev *dev; int err = -ENOMEM; atkbd = kzalloc(sizeof(*atkbd), GFP_KERNEL); dev = input_allocate_device(); if (!atkbd || !dev) goto fail1; atkbd->dev = dev; ps2_init(&atkbd->ps2dev, serio, atkbd_pre_receive_byte, atkbd_receive_byte); INIT_DELAYED_WORK(&atkbd->event_work, atkbd_event_work); mutex_init(&atkbd->mutex); switch (serio->id.type) { case SERIO_8042_XL: atkbd->translated = true; fallthrough; case SERIO_8042: if (serio->write) atkbd->write = true; break; } atkbd->softraw = atkbd_softraw; atkbd->softrepeat = atkbd_softrepeat; atkbd->scroll = atkbd_scroll; if (atkbd->softrepeat) atkbd->softraw = true; serio_set_drvdata(serio, atkbd); err = serio_open(serio, drv); if (err) goto fail2; if (atkbd->write) { if (atkbd_probe(atkbd)) { err = -ENODEV; goto fail3; } atkbd->set = atkbd_select_set(atkbd, atkbd_set, atkbd_extra); atkbd_reset_state(atkbd); } else { atkbd->set = 2; atkbd->id = 0xab00; } atkbd_parse_fwnode_data(serio); atkbd_set_keycode_table(atkbd); atkbd_set_device_attrs(atkbd); atkbd_enable(atkbd); if (serio->write) atkbd_activate(atkbd); err = input_register_device(atkbd->dev); if (err) goto fail3; return 0; fail3: serio_close(serio); fail2: serio_set_drvdata(serio, NULL); fail1: input_free_device(dev); kfree(atkbd); return err; } /* * atkbd_reconnect() tries to restore keyboard into a sane state and is * most likely called on resume. */ static int atkbd_reconnect(struct serio *serio) { struct atkbd *atkbd = atkbd_from_serio(serio); struct serio_driver *drv = serio->drv; int error; if (!atkbd || !drv) { dev_dbg(&serio->dev, "reconnect request, but serio is disconnected, ignoring...\n"); return -1; } guard(mutex)(&atkbd->mutex); atkbd_disable(atkbd); if (atkbd->write) { error = atkbd_probe(atkbd); if (error) return error; if (atkbd->set != atkbd_select_set(atkbd, atkbd->set, atkbd->extra)) return -EIO; /* * Restore LED state and repeat rate. While input core * will do this for us at resume time reconnect may happen * because user requested it via sysfs or simply because * keyboard was unplugged and plugged in again so we need * to do it ourselves here. */ atkbd_set_leds(atkbd); if (!atkbd->softrepeat) atkbd_set_repeat_rate(atkbd); } /* * Reset our state machine in case reconnect happened in the middle * of multi-byte scancode. */ atkbd->xl_bit = 0; atkbd->emul = 0; atkbd_enable(atkbd); if (atkbd->write) atkbd_activate(atkbd); return 0; } static const struct serio_device_id atkbd_serio_ids[] = { { .type = SERIO_8042, .proto = SERIO_ANY, .id = SERIO_ANY, .extra = SERIO_ANY, }, { .type = SERIO_8042_XL, .proto = SERIO_ANY, .id = SERIO_ANY, .extra = SERIO_ANY, }, { .type = SERIO_RS232, .proto = SERIO_PS2SER, .id = SERIO_ANY, .extra = SERIO_ANY, }, { 0 } }; MODULE_DEVICE_TABLE(serio, atkbd_serio_ids); static struct serio_driver atkbd_drv = { .driver = { .name = "atkbd", .dev_groups = atkbd_attribute_groups, }, .description = DRIVER_DESC, .id_table = atkbd_serio_ids, .interrupt = ps2_interrupt, .connect = atkbd_connect, .reconnect = atkbd_reconnect, .disconnect = atkbd_disconnect, .cleanup = atkbd_cleanup, }; static ssize_t atkbd_attr_show_helper(struct device *dev, char *buf, ssize_t (*handler)(struct atkbd *, char *)) { struct serio *serio = to_serio_port(dev); struct atkbd *atkbd = atkbd_from_serio(serio); return handler(atkbd, buf); } static ssize_t atkbd_attr_set_helper(struct device *dev, const char *buf, size_t count, ssize_t (*handler)(struct atkbd *, const char *, size_t)) { struct serio *serio = to_serio_port(dev); struct atkbd *atkbd = atkbd_from_serio(serio); int retval; scoped_guard(mutex_intr, &atkbd->mutex) { atkbd_disable(atkbd); retval = handler(atkbd, buf, count); atkbd_enable(atkbd); return retval; } return -EINTR; } static ssize_t atkbd_show_extra(struct atkbd *atkbd, char *buf) { return sprintf(buf, "%d\n", atkbd->extra ? 1 : 0); } static ssize_t atkbd_set_extra(struct atkbd *atkbd, const char *buf, size_t count) { struct input_dev *old_dev, *new_dev; unsigned int value; int err; bool old_extra; unsigned char old_set; if (!atkbd->write) return -EIO; err = kstrtouint(buf, 10, &value); if (err) return err; if (value > 1) return -EINVAL; if (atkbd->extra != value) { /* * Since device's properties will change we need to * unregister old device. But allocate and register * new one first to make sure we have it. */ old_dev = atkbd->dev; old_extra = atkbd->extra; old_set = atkbd->set; new_dev = input_allocate_device(); if (!new_dev) return -ENOMEM; atkbd->dev = new_dev; atkbd->set = atkbd_select_set(atkbd, atkbd->set, value); atkbd_reset_state(atkbd); atkbd_activate(atkbd); atkbd_set_keycode_table(atkbd); atkbd_set_device_attrs(atkbd); err = input_register_device(atkbd->dev); if (err) { input_free_device(new_dev); atkbd->dev = old_dev; atkbd->set = atkbd_select_set(atkbd, old_set, old_extra); atkbd_set_keycode_table(atkbd); atkbd_set_device_attrs(atkbd); return err; } input_unregister_device(old_dev); } return count; } static ssize_t atkbd_show_force_release(struct atkbd *atkbd, char *buf) { size_t len = scnprintf(buf, PAGE_SIZE - 1, "%*pbl", ATKBD_KEYMAP_SIZE, atkbd->force_release_mask); buf[len++] = '\n'; buf[len] = '\0'; return len; } static ssize_t atkbd_set_force_release(struct atkbd *atkbd, const char *buf, size_t count) { /* 64 bytes on stack should be acceptable */ DECLARE_BITMAP(new_mask, ATKBD_KEYMAP_SIZE); int err; err = bitmap_parselist(buf, new_mask, ATKBD_KEYMAP_SIZE); if (err) return err; memcpy(atkbd->force_release_mask, new_mask, sizeof(atkbd->force_release_mask)); return count; } static ssize_t atkbd_show_scroll(struct atkbd *atkbd, char *buf) { return sprintf(buf, "%d\n", atkbd->scroll ? 1 : 0); } static ssize_t atkbd_set_scroll(struct atkbd *atkbd, const char *buf, size_t count) { struct input_dev *old_dev, *new_dev; unsigned int value; int err; bool old_scroll; err = kstrtouint(buf, 10, &value); if (err) return err; if (value > 1) return -EINVAL; if (atkbd->scroll != value) { old_dev = atkbd->dev; old_scroll = atkbd->scroll; new_dev = input_allocate_device(); if (!new_dev) return -ENOMEM; atkbd->dev = new_dev; atkbd->scroll = value; atkbd_set_keycode_table(atkbd); atkbd_set_device_attrs(atkbd); err = input_register_device(atkbd->dev); if (err) { input_free_device(new_dev); atkbd->scroll = old_scroll; atkbd->dev = old_dev; atkbd_set_keycode_table(atkbd); atkbd_set_device_attrs(atkbd); return err; } input_unregister_device(old_dev); } return count; } static ssize_t atkbd_show_set(struct atkbd *atkbd, char *buf) { return sprintf(buf, "%d\n", atkbd->set); } static ssize_t atkbd_set_set(struct atkbd *atkbd, const char *buf, size_t count) { struct input_dev *old_dev, *new_dev; unsigned int value; int err; unsigned char old_set; bool old_extra; if (!atkbd->write) return -EIO; err = kstrtouint(buf, 10, &value); if (err) return err; if (value != 2 && value != 3) return -EINVAL; if (atkbd->set != value) { old_dev = atkbd->dev; old_extra = atkbd->extra; old_set = atkbd->set; new_dev = input_allocate_device(); if (!new_dev) return -ENOMEM; atkbd->dev = new_dev; atkbd->set = atkbd_select_set(atkbd, value, atkbd->extra); atkbd_reset_state(atkbd); atkbd_activate(atkbd); atkbd_set_keycode_table(atkbd); atkbd_set_device_attrs(atkbd); err = input_register_device(atkbd->dev); if (err) { input_free_device(new_dev); atkbd->dev = old_dev; atkbd->set = atkbd_select_set(atkbd, old_set, old_extra); atkbd_set_keycode_table(atkbd); atkbd_set_device_attrs(atkbd); return err; } input_unregister_device(old_dev); } return count; } static ssize_t atkbd_show_softrepeat(struct atkbd *atkbd, char *buf) { return sprintf(buf, "%d\n", atkbd->softrepeat ? 1 : 0); } static ssize_t atkbd_set_softrepeat(struct atkbd *atkbd, const char *buf, size_t count) { struct input_dev *old_dev, *new_dev; unsigned int value; int err; bool old_softrepeat, old_softraw; if (!atkbd->write) return -EIO; err = kstrtouint(buf, 10, &value); if (err) return err; if (value > 1) return -EINVAL; if (atkbd->softrepeat != value) { old_dev = atkbd->dev; old_softrepeat = atkbd->softrepeat; old_softraw = atkbd->softraw; new_dev = input_allocate_device(); if (!new_dev) return -ENOMEM; atkbd->dev = new_dev; atkbd->softrepeat = value; if (atkbd->softrepeat) atkbd->softraw = true; atkbd_set_device_attrs(atkbd); err = input_register_device(atkbd->dev); if (err) { input_free_device(new_dev); atkbd->dev = old_dev; atkbd->softrepeat = old_softrepeat; atkbd->softraw = old_softraw; atkbd_set_device_attrs(atkbd); return err; } input_unregister_device(old_dev); } return count; } static ssize_t atkbd_show_softraw(struct atkbd *atkbd, char *buf) { return sprintf(buf, "%d\n", atkbd->softraw ? 1 : 0); } static ssize_t atkbd_set_softraw(struct atkbd *atkbd, const char *buf, size_t count) { struct input_dev *old_dev, *new_dev; unsigned int value; int err; bool old_softraw; err = kstrtouint(buf, 10, &value); if (err) return err; if (value > 1) return -EINVAL; if (atkbd->softraw != value) { old_dev = atkbd->dev; old_softraw = atkbd->softraw; new_dev = input_allocate_device(); if (!new_dev) return -ENOMEM; atkbd->dev = new_dev; atkbd->softraw = value; atkbd_set_device_attrs(atkbd); err = input_register_device(atkbd->dev); if (err) { input_free_device(new_dev); atkbd->dev = old_dev; atkbd->softraw = old_softraw; atkbd_set_device_attrs(atkbd); return err; } input_unregister_device(old_dev); } return count; } static ssize_t atkbd_show_err_count(struct atkbd *atkbd, char *buf) { return sprintf(buf, "%lu\n", atkbd->err_count); } static int __init atkbd_setup_forced_release(const struct dmi_system_id *id) { atkbd_platform_fixup = atkbd_apply_forced_release_keylist; atkbd_platform_fixup_data = id->driver_data; return 1; } static int __init atkbd_setup_scancode_fixup(const struct dmi_system_id *id) { atkbd_platform_scancode_fixup = id->driver_data; return 1; } static int __init atkbd_deactivate_fixup(const struct dmi_system_id *id) { atkbd_skip_deactivate = true; return 1; } /* * NOTE: do not add any more "force release" quirks to this table. The * task of adjusting list of keys that should be "released" automatically * by the driver is now delegated to userspace tools, such as udev, so * submit such quirks there. */ static const struct dmi_system_id atkbd_dmi_quirk_table[] __initconst = { { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_CHASSIS_TYPE, "8"), /* Portable */ }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_dell_laptop_forced_release_keys, }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), DMI_MATCH(DMI_CHASSIS_TYPE, "8"), /* Portable */ }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_dell_laptop_forced_release_keys, }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), DMI_MATCH(DMI_PRODUCT_NAME, "HP 2133"), }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_hp_forced_release_keys, }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), DMI_MATCH(DMI_PRODUCT_NAME, "Pavilion ZV6100"), }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_volume_forced_release_keys, }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), DMI_MATCH(DMI_PRODUCT_NAME, "Presario R4000"), }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_volume_forced_release_keys, }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), DMI_MATCH(DMI_PRODUCT_NAME, "Presario R4100"), }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_volume_forced_release_keys, }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), DMI_MATCH(DMI_PRODUCT_NAME, "Presario R4200"), }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_volume_forced_release_keys, }, { /* Inventec Symphony */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "INVENTEC"), DMI_MATCH(DMI_PRODUCT_NAME, "SYMPHONY 6.0/7.0"), }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_volume_forced_release_keys, }, { /* Samsung NC10 */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."), DMI_MATCH(DMI_PRODUCT_NAME, "NC10"), }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_samsung_forced_release_keys, }, { /* Samsung NC20 */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."), DMI_MATCH(DMI_PRODUCT_NAME, "NC20"), }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_samsung_forced_release_keys, }, { /* Samsung SQ45S70S */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."), DMI_MATCH(DMI_PRODUCT_NAME, "SQ45S70S"), }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_samsung_forced_release_keys, }, { /* Fujitsu Amilo PA 1510 */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), DMI_MATCH(DMI_PRODUCT_NAME, "AMILO Pa 1510"), }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_volume_forced_release_keys, }, { /* Fujitsu Amilo Pi 3525 */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), DMI_MATCH(DMI_PRODUCT_NAME, "AMILO Pi 3525"), }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_amilo_pi3525_forced_release_keys, }, { /* Fujitsu Amilo Xi 3650 */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), DMI_MATCH(DMI_PRODUCT_NAME, "AMILO Xi 3650"), }, .callback = atkbd_setup_forced_release, .driver_data = atkbd_amilo_xi3650_forced_release_keys, }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Soltech Corporation"), DMI_MATCH(DMI_PRODUCT_NAME, "TA12"), }, .callback = atkbd_setup_forced_release, .driver_data = atkdb_soltech_ta12_forced_release_keys, }, { /* OQO Model 01+ */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "OQO"), DMI_MATCH(DMI_PRODUCT_NAME, "ZEPTO"), }, .callback = atkbd_setup_scancode_fixup, .driver_data = atkbd_oqo_01plus_scancode_fixup, }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LG Electronics"), }, .callback = atkbd_deactivate_fixup, }, { } }; static int __init atkbd_init(void) { dmi_check_system(atkbd_dmi_quirk_table); return serio_register_driver(&atkbd_drv); } static void __exit atkbd_exit(void) { serio_unregister_driver(&atkbd_drv); } module_init(atkbd_init); module_exit(atkbd_exit);
2 1 1 1 8 9 7 2 3 1 4 4 4 4 5 5 4 1 1 3 3 1 3 1 7 3 3 4 98 1 17 97 81 16 1 94 3 5 125 3 2 2 2 2 1 1 1 1 1 1 1 1 1 7 7 4 4 3 3 3 109 102 2 6 110 98 6 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> * * Development of this code funded by Astaro AG (http://www.astaro.com/) */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/list.h> #include <linux/log2.h> #include <linux/jhash.h> #include <linux/netlink.h> #include <linux/workqueue.h> #include <linux/rhashtable.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> /* We target a hash table size of 4, element hint is 75% of final size */ #define NFT_RHASH_ELEMENT_HINT 3 struct nft_rhash { struct rhashtable ht; struct delayed_work gc_work; u32 wq_gc_seq; }; struct nft_rhash_elem { struct nft_elem_priv priv; struct rhash_head node; struct llist_node walk_node; u32 wq_gc_seq; struct nft_set_ext ext; }; struct nft_rhash_cmp_arg { const struct nft_set *set; const u32 *key; u8 genmask; u64 tstamp; }; static inline u32 nft_rhash_key(const void *data, u32 len, u32 seed) { const struct nft_rhash_cmp_arg *arg = data; return jhash(arg->key, len, seed); } static inline u32 nft_rhash_obj(const void *data, u32 len, u32 seed) { const struct nft_rhash_elem *he = data; return jhash(nft_set_ext_key(&he->ext), len, seed); } static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg, const void *ptr) { const struct nft_rhash_cmp_arg *x = arg->key; const struct nft_rhash_elem *he = ptr; if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen)) return 1; if (nft_set_elem_is_dead(&he->ext)) return 1; if (__nft_set_elem_expired(&he->ext, x->tstamp)) return 1; if (!nft_set_elem_active(&he->ext, x->genmask)) return 1; return 0; } static const struct rhashtable_params nft_rhash_params = { .head_offset = offsetof(struct nft_rhash_elem, node), .hashfn = nft_rhash_key, .obj_hashfn = nft_rhash_obj, .obj_cmpfn = nft_rhash_cmp, .automatic_shrinking = true, }; INDIRECT_CALLABLE_SCOPE const struct nft_set_ext * nft_rhash_lookup(const struct net *net, const struct nft_set *set, const u32 *key) { struct nft_rhash *priv = nft_set_priv(set); const struct nft_rhash_elem *he; struct nft_rhash_cmp_arg arg = { .genmask = nft_genmask_cur(net), .set = set, .key = key, .tstamp = get_jiffies_64(), }; he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params); if (he != NULL) return &he->ext; return NULL; } static struct nft_elem_priv * nft_rhash_get(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, unsigned int flags) { struct nft_rhash *priv = nft_set_priv(set); struct nft_rhash_elem *he; struct nft_rhash_cmp_arg arg = { .genmask = nft_genmask_cur(net), .set = set, .key = elem->key.val.data, .tstamp = get_jiffies_64(), }; he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params); if (he != NULL) return &he->priv; return ERR_PTR(-ENOENT); } static const struct nft_set_ext * nft_rhash_update(struct nft_set *set, const u32 *key, const struct nft_expr *expr, struct nft_regs *regs) { struct nft_rhash *priv = nft_set_priv(set); struct nft_rhash_elem *he, *prev; struct nft_elem_priv *elem_priv; struct nft_rhash_cmp_arg arg = { .genmask = NFT_GENMASK_ANY, .set = set, .key = key, .tstamp = get_jiffies_64(), }; he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params); if (he != NULL) goto out; elem_priv = nft_dynset_new(set, expr, regs); if (!elem_priv) goto err1; he = nft_elem_priv_cast(elem_priv); init_llist_node(&he->walk_node); prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node, nft_rhash_params); if (IS_ERR(prev)) goto err2; /* Another cpu may race to insert the element with the same key */ if (prev) { nft_set_elem_destroy(set, &he->priv, true); atomic_dec(&set->nelems); he = prev; } out: return &he->ext; err2: nft_set_elem_destroy(set, &he->priv, true); atomic_dec(&set->nelems); err1: return NULL; } static int nft_rhash_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, struct nft_elem_priv **elem_priv) { struct nft_rhash_elem *he = nft_elem_priv_cast(elem->priv); struct nft_rhash *priv = nft_set_priv(set); struct nft_rhash_cmp_arg arg = { .genmask = nft_genmask_next(net), .set = set, .key = elem->key.val.data, .tstamp = nft_net_tstamp(net), }; struct nft_rhash_elem *prev; init_llist_node(&he->walk_node); prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node, nft_rhash_params); if (IS_ERR(prev)) return PTR_ERR(prev); if (prev) { *elem_priv = &prev->priv; return -EEXIST; } return 0; } static void nft_rhash_activate(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv) { struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv); nft_clear(net, &he->ext); } static void nft_rhash_flush(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv) { struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv); nft_set_elem_change_active(net, set, &he->ext); } static struct nft_elem_priv * nft_rhash_deactivate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_rhash *priv = nft_set_priv(set); struct nft_rhash_elem *he; struct nft_rhash_cmp_arg arg = { .genmask = nft_genmask_next(net), .set = set, .key = elem->key.val.data, .tstamp = nft_net_tstamp(net), }; rcu_read_lock(); he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params); if (he) nft_set_elem_change_active(net, set, &he->ext); rcu_read_unlock(); return &he->priv; } static void nft_rhash_remove(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv) { struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv); struct nft_rhash *priv = nft_set_priv(set); rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params); } static bool nft_rhash_delete(const struct nft_set *set, const u32 *key) { struct nft_rhash *priv = nft_set_priv(set); struct nft_rhash_cmp_arg arg = { .genmask = NFT_GENMASK_ANY, .set = set, .key = key, }; struct nft_rhash_elem *he; he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params); if (he == NULL) return false; nft_set_elem_dead(&he->ext); return true; } static void nft_rhash_walk_ro(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter) { struct nft_rhash *priv = nft_set_priv(set); struct rhashtable_iter hti; struct nft_rhash_elem *he; rhashtable_walk_enter(&priv->ht, &hti); rhashtable_walk_start(&hti); while ((he = rhashtable_walk_next(&hti))) { if (IS_ERR(he)) { if (PTR_ERR(he) != -EAGAIN) { iter->err = PTR_ERR(he); break; } continue; } if (iter->count < iter->skip) goto cont; iter->err = iter->fn(ctx, set, iter, &he->priv); if (iter->err < 0) break; cont: iter->count++; } rhashtable_walk_stop(&hti); rhashtable_walk_exit(&hti); } static void nft_rhash_walk_update(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter) { struct nft_rhash *priv = nft_set_priv(set); struct nft_rhash_elem *he, *tmp; struct llist_node *first_node; struct rhashtable_iter hti; LLIST_HEAD(walk_list); lockdep_assert_held(&nft_pernet(ctx->net)->commit_mutex); if (set->in_update_walk) { /* This can happen with bogus rulesets during ruleset validation * when a verdict map causes a jump back to the same map. * * Without this extra check the walk_next loop below will see * elems on the callers walk_list and skip (not validate) them. */ iter->err = -EMLINK; return; } /* walk happens under RCU. * * We create a snapshot list so ->iter callback can sleep. * commit_mutex is held, elements can ... * .. be added in parallel from dataplane (dynset) * .. be marked as dead in parallel from dataplane (dynset). * .. be queued for removal in parallel (gc timeout). * .. not be freed: transaction mutex is held. */ rhashtable_walk_enter(&priv->ht, &hti); rhashtable_walk_start(&hti); while ((he = rhashtable_walk_next(&hti))) { if (IS_ERR(he)) { if (PTR_ERR(he) != -EAGAIN) { iter->err = PTR_ERR(he); break; } continue; } /* rhashtable resized during walk, skip */ if (llist_on_list(&he->walk_node)) continue; llist_add(&he->walk_node, &walk_list); } rhashtable_walk_stop(&hti); rhashtable_walk_exit(&hti); first_node = __llist_del_all(&walk_list); set->in_update_walk = true; llist_for_each_entry_safe(he, tmp, first_node, walk_node) { if (iter->err == 0) { iter->err = iter->fn(ctx, set, iter, &he->priv); if (iter->err == 0) iter->count++; } /* all entries must be cleared again, else next ->walk iteration * will skip entries. */ init_llist_node(&he->walk_node); } set->in_update_walk = false; } static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter) { switch (iter->type) { case NFT_ITER_UPDATE: /* only relevant for netlink dumps which use READ type */ WARN_ON_ONCE(iter->skip != 0); nft_rhash_walk_update(ctx, set, iter); break; case NFT_ITER_READ: nft_rhash_walk_ro(ctx, set, iter); break; default: iter->err = -EINVAL; WARN_ON_ONCE(1); break; } } static bool nft_rhash_expr_needs_gc_run(const struct nft_set *set, struct nft_set_ext *ext) { struct nft_set_elem_expr *elem_expr = nft_set_ext_expr(ext); struct nft_expr *expr; u32 size; nft_setelem_expr_foreach(expr, elem_expr, size) { if (expr->ops->gc && expr->ops->gc(read_pnet(&set->net), expr) && set->flags & NFT_SET_EVAL) return true; } return false; } static void nft_rhash_gc(struct work_struct *work) { struct nftables_pernet *nft_net; struct nft_set *set; struct nft_rhash_elem *he; struct nft_rhash *priv; struct rhashtable_iter hti; struct nft_trans_gc *gc; struct net *net; u32 gc_seq; priv = container_of(work, struct nft_rhash, gc_work.work); set = nft_set_container_of(priv); net = read_pnet(&set->net); nft_net = nft_pernet(net); gc_seq = READ_ONCE(nft_net->gc_seq); if (nft_set_gc_is_pending(set)) goto done; gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL); if (!gc) goto done; /* Elements never collected use a zero gc worker sequence number. */ if (unlikely(++priv->wq_gc_seq == 0)) priv->wq_gc_seq++; rhashtable_walk_enter(&priv->ht, &hti); rhashtable_walk_start(&hti); while ((he = rhashtable_walk_next(&hti))) { if (IS_ERR(he)) { nft_trans_gc_destroy(gc); gc = NULL; goto try_later; } /* Ruleset has been updated, try later. */ if (READ_ONCE(nft_net->gc_seq) != gc_seq) { nft_trans_gc_destroy(gc); gc = NULL; goto try_later; } /* rhashtable walk is unstable, already seen in this gc run? * Then, skip this element. In case of (unlikely) sequence * wraparound and stale element wq_gc_seq, next gc run will * just find this expired element. */ if (he->wq_gc_seq == priv->wq_gc_seq) continue; if (nft_set_elem_is_dead(&he->ext)) goto dead_elem; if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPRESSIONS) && nft_rhash_expr_needs_gc_run(set, &he->ext)) goto needs_gc_run; if (!nft_set_elem_expired(&he->ext)) continue; needs_gc_run: nft_set_elem_dead(&he->ext); dead_elem: gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC); if (!gc) goto try_later; /* annotate gc sequence for this attempt. */ he->wq_gc_seq = priv->wq_gc_seq; nft_trans_gc_elem_add(gc, he); } gc = nft_trans_gc_catchall_async(gc, gc_seq); try_later: /* catchall list iteration requires rcu read side lock. */ rhashtable_walk_stop(&hti); rhashtable_walk_exit(&hti); if (gc) nft_trans_gc_queue_async_done(gc); done: queue_delayed_work(system_power_efficient_wq, &priv->gc_work, nft_set_gc_interval(set)); } static u64 nft_rhash_privsize(const struct nlattr * const nla[], const struct nft_set_desc *desc) { return sizeof(struct nft_rhash); } static void nft_rhash_gc_init(const struct nft_set *set) { struct nft_rhash *priv = nft_set_priv(set); queue_delayed_work(system_power_efficient_wq, &priv->gc_work, nft_set_gc_interval(set)); } static int nft_rhash_init(const struct nft_set *set, const struct nft_set_desc *desc, const struct nlattr * const tb[]) { struct nft_rhash *priv = nft_set_priv(set); struct rhashtable_params params = nft_rhash_params; int err; BUILD_BUG_ON(offsetof(struct nft_rhash_elem, priv) != 0); params.nelem_hint = desc->size ?: NFT_RHASH_ELEMENT_HINT; params.key_len = set->klen; err = rhashtable_init(&priv->ht, &params); if (err < 0) return err; INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc); if (set->flags & (NFT_SET_TIMEOUT | NFT_SET_EVAL)) nft_rhash_gc_init(set); return 0; } struct nft_rhash_ctx { const struct nft_ctx ctx; const struct nft_set *set; }; static void nft_rhash_elem_destroy(void *ptr, void *arg) { struct nft_rhash_ctx *rhash_ctx = arg; struct nft_rhash_elem *he = ptr; nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, &he->priv); } static void nft_rhash_destroy(const struct nft_ctx *ctx, const struct nft_set *set) { struct nft_rhash *priv = nft_set_priv(set); struct nft_rhash_ctx rhash_ctx = { .ctx = *ctx, .set = set, }; cancel_delayed_work_sync(&priv->gc_work); rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy, (void *)&rhash_ctx); } /* Number of buckets is stored in u32, so cap our result to 1U<<31 */ #define NFT_MAX_BUCKETS (1U << 31) static u32 nft_hash_buckets(u32 size) { u64 val = div_u64((u64)size * 4, 3); if (val >= NFT_MAX_BUCKETS) return NFT_MAX_BUCKETS; return roundup_pow_of_two(val); } static bool nft_rhash_estimate(const struct nft_set_desc *desc, u32 features, struct nft_set_estimate *est) { est->size = ~0; est->lookup = NFT_SET_CLASS_O_1; est->space = NFT_SET_CLASS_O_N; return true; } struct nft_hash { u32 seed; u32 buckets; struct hlist_head table[]; }; struct nft_hash_elem { struct nft_elem_priv priv; struct hlist_node node; struct nft_set_ext ext; }; INDIRECT_CALLABLE_SCOPE const struct nft_set_ext * nft_hash_lookup(const struct net *net, const struct nft_set *set, const u32 *key) { struct nft_hash *priv = nft_set_priv(set); u8 genmask = nft_genmask_cur(net); const struct nft_hash_elem *he; u32 hash; hash = jhash(key, set->klen, priv->seed); hash = reciprocal_scale(hash, priv->buckets); hlist_for_each_entry_rcu(he, &priv->table[hash], node) { if (!memcmp(nft_set_ext_key(&he->ext), key, set->klen) && nft_set_elem_active(&he->ext, genmask)) return &he->ext; } return NULL; } static struct nft_elem_priv * nft_hash_get(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, unsigned int flags) { struct nft_hash *priv = nft_set_priv(set); u8 genmask = nft_genmask_cur(net); struct nft_hash_elem *he; u32 hash; hash = jhash(elem->key.val.data, set->klen, priv->seed); hash = reciprocal_scale(hash, priv->buckets); hlist_for_each_entry_rcu(he, &priv->table[hash], node) { if (!memcmp(nft_set_ext_key(&he->ext), elem->key.val.data, set->klen) && nft_set_elem_active(&he->ext, genmask)) return &he->priv; } return ERR_PTR(-ENOENT); } INDIRECT_CALLABLE_SCOPE const struct nft_set_ext * nft_hash_lookup_fast(const struct net *net, const struct nft_set *set, const u32 *key) { struct nft_hash *priv = nft_set_priv(set); u8 genmask = nft_genmask_cur(net); const struct nft_hash_elem *he; u32 hash, k1, k2; k1 = *key; hash = jhash_1word(k1, priv->seed); hash = reciprocal_scale(hash, priv->buckets); hlist_for_each_entry_rcu(he, &priv->table[hash], node) { k2 = *(u32 *)nft_set_ext_key(&he->ext)->data; if (k1 == k2 && nft_set_elem_active(&he->ext, genmask)) return &he->ext; } return NULL; } static u32 nft_jhash(const struct nft_set *set, const struct nft_hash *priv, const struct nft_set_ext *ext) { const struct nft_data *key = nft_set_ext_key(ext); u32 hash, k1; if (set->klen == 4) { k1 = *(u32 *)key; hash = jhash_1word(k1, priv->seed); } else { hash = jhash(key, set->klen, priv->seed); } hash = reciprocal_scale(hash, priv->buckets); return hash; } static int nft_hash_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, struct nft_elem_priv **elem_priv) { struct nft_hash_elem *this = nft_elem_priv_cast(elem->priv), *he; struct nft_hash *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); u32 hash; hash = nft_jhash(set, priv, &this->ext); hlist_for_each_entry(he, &priv->table[hash], node) { if (!memcmp(nft_set_ext_key(&this->ext), nft_set_ext_key(&he->ext), set->klen) && nft_set_elem_active(&he->ext, genmask)) { *elem_priv = &he->priv; return -EEXIST; } } hlist_add_head_rcu(&this->node, &priv->table[hash]); return 0; } static void nft_hash_activate(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv) { struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv); nft_clear(net, &he->ext); } static void nft_hash_flush(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv) { struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv); nft_set_elem_change_active(net, set, &he->ext); } static struct nft_elem_priv * nft_hash_deactivate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_hash_elem *this = nft_elem_priv_cast(elem->priv), *he; struct nft_hash *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); u32 hash; hash = nft_jhash(set, priv, &this->ext); hlist_for_each_entry(he, &priv->table[hash], node) { if (!memcmp(nft_set_ext_key(&he->ext), &elem->key.val, set->klen) && nft_set_elem_active(&he->ext, genmask)) { nft_set_elem_change_active(net, set, &he->ext); return &he->priv; } } return NULL; } static void nft_hash_remove(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv) { struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv); hlist_del_rcu(&he->node); } static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter) { struct nft_hash *priv = nft_set_priv(set); struct nft_hash_elem *he; int i; for (i = 0; i < priv->buckets; i++) { hlist_for_each_entry_rcu(he, &priv->table[i], node, lockdep_is_held(&nft_pernet(ctx->net)->commit_mutex)) { if (iter->count < iter->skip) goto cont; iter->err = iter->fn(ctx, set, iter, &he->priv); if (iter->err < 0) return; cont: iter->count++; } } } static u64 nft_hash_privsize(const struct nlattr * const nla[], const struct nft_set_desc *desc) { return sizeof(struct nft_hash) + (u64)nft_hash_buckets(desc->size) * sizeof(struct hlist_head); } static int nft_hash_init(const struct nft_set *set, const struct nft_set_desc *desc, const struct nlattr * const tb[]) { struct nft_hash *priv = nft_set_priv(set); priv->buckets = nft_hash_buckets(desc->size); get_random_bytes(&priv->seed, sizeof(priv->seed)); return 0; } static void nft_hash_destroy(const struct nft_ctx *ctx, const struct nft_set *set) { struct nft_hash *priv = nft_set_priv(set); struct nft_hash_elem *he; struct hlist_node *next; int i; for (i = 0; i < priv->buckets; i++) { hlist_for_each_entry_safe(he, next, &priv->table[i], node) { hlist_del_rcu(&he->node); nf_tables_set_elem_destroy(ctx, set, &he->priv); } } } static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, struct nft_set_estimate *est) { if (!desc->size) return false; if (desc->klen == 4) return false; est->size = sizeof(struct nft_hash) + (u64)nft_hash_buckets(desc->size) * sizeof(struct hlist_head) + (u64)desc->size * sizeof(struct nft_hash_elem); est->lookup = NFT_SET_CLASS_O_1; est->space = NFT_SET_CLASS_O_N; return true; } static bool nft_hash_fast_estimate(const struct nft_set_desc *desc, u32 features, struct nft_set_estimate *est) { if (!desc->size) return false; if (desc->klen != 4) return false; est->size = sizeof(struct nft_hash) + (u64)nft_hash_buckets(desc->size) * sizeof(struct hlist_head) + (u64)desc->size * sizeof(struct nft_hash_elem); est->lookup = NFT_SET_CLASS_O_1; est->space = NFT_SET_CLASS_O_N; return true; } const struct nft_set_type nft_set_rhash_type = { .features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT | NFT_SET_EVAL, .ops = { .privsize = nft_rhash_privsize, .elemsize = offsetof(struct nft_rhash_elem, ext), .estimate = nft_rhash_estimate, .init = nft_rhash_init, .gc_init = nft_rhash_gc_init, .destroy = nft_rhash_destroy, .insert = nft_rhash_insert, .activate = nft_rhash_activate, .deactivate = nft_rhash_deactivate, .flush = nft_rhash_flush, .remove = nft_rhash_remove, .lookup = nft_rhash_lookup, .update = nft_rhash_update, .delete = nft_rhash_delete, .walk = nft_rhash_walk, .get = nft_rhash_get, }, }; const struct nft_set_type nft_set_hash_type = { .features = NFT_SET_MAP | NFT_SET_OBJECT, .ops = { .privsize = nft_hash_privsize, .elemsize = offsetof(struct nft_hash_elem, ext), .estimate = nft_hash_estimate, .init = nft_hash_init, .destroy = nft_hash_destroy, .insert = nft_hash_insert, .activate = nft_hash_activate, .deactivate = nft_hash_deactivate, .flush = nft_hash_flush, .remove = nft_hash_remove, .lookup = nft_hash_lookup, .walk = nft_hash_walk, .get = nft_hash_get, }, }; const struct nft_set_type nft_set_hash_fast_type = { .features = NFT_SET_MAP | NFT_SET_OBJECT, .ops = { .privsize = nft_hash_privsize, .elemsize = offsetof(struct nft_hash_elem, ext), .estimate = nft_hash_fast_estimate, .init = nft_hash_init, .destroy = nft_hash_destroy, .insert = nft_hash_insert, .activate = nft_hash_activate, .deactivate = nft_hash_deactivate, .flush = nft_hash_flush, .remove = nft_hash_remove, .lookup = nft_hash_lookup_fast, .walk = nft_hash_walk, .get = nft_hash_get, }, };
6 1 5 6 5 5 9 2 1 3 3 2 3 3 4 1 3 3 1 2 2 2 3 1 2 2 2 1 1 1 1 1 5 1 1 2 1 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 // SPDX-License-Identifier: GPL-2.0-or-later /* GTP according to GSM TS 09.60 / 3GPP TS 29.060 * * (C) 2012-2014 by sysmocom - s.f.m.c. GmbH * (C) 2016 by Pablo Neira Ayuso <pablo@netfilter.org> * * Author: Harald Welte <hwelte@sysmocom.de> * Pablo Neira Ayuso <pablo@netfilter.org> * Andreas Schultz <aschultz@travelping.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/udp.h> #include <linux/rculist.h> #include <linux/jhash.h> #include <linux/if_tunnel.h> #include <linux/net.h> #include <linux/file.h> #include <linux/gtp.h> #include <net/flow.h> #include <net/inet_dscp.h> #include <net/net_namespace.h> #include <net/protocol.h> #include <net/inet_sock.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/udp.h> #include <net/udp_tunnel.h> #include <net/icmp.h> #include <net/xfrm.h> #include <net/genetlink.h> #include <net/netns/generic.h> #include <net/gtp.h> /* An active session for the subscriber. */ struct pdp_ctx { struct hlist_node hlist_tid; struct hlist_node hlist_addr; union { struct { u64 tid; u16 flow; } v0; struct { u32 i_tei; u32 o_tei; } v1; } u; u8 gtp_version; u16 af; union { struct in_addr addr; struct in6_addr addr6; } ms; union { struct in_addr addr; struct in6_addr addr6; } peer; struct sock *sk; struct net_device *dev; atomic_t tx_seq; struct rcu_head rcu_head; }; /* One instance of the GTP device. */ struct gtp_dev { struct list_head list; struct sock *sk0; struct sock *sk1u; u8 sk_created; struct net_device *dev; struct net *net; unsigned int role; unsigned int hash_size; struct hlist_head *tid_hash; struct hlist_head *addr_hash; u8 restart_count; }; struct echo_info { u16 af; u8 gtp_version; union { struct in_addr addr; } ms; union { struct in_addr addr; } peer; }; static unsigned int gtp_net_id __read_mostly; struct gtp_net { struct list_head gtp_dev_list; }; static u32 gtp_h_initval; static struct genl_family gtp_genl_family; enum gtp_multicast_groups { GTP_GENL_MCGRP, }; static const struct genl_multicast_group gtp_genl_mcgrps[] = { [GTP_GENL_MCGRP] = { .name = GTP_GENL_MCGRP_NAME }, }; static void pdp_context_delete(struct pdp_ctx *pctx); static inline u32 gtp0_hashfn(u64 tid) { u32 *tid32 = (u32 *) &tid; return jhash_2words(tid32[0], tid32[1], gtp_h_initval); } static inline u32 gtp1u_hashfn(u32 tid) { return jhash_1word(tid, gtp_h_initval); } static inline u32 ipv4_hashfn(__be32 ip) { return jhash_1word((__force u32)ip, gtp_h_initval); } static u32 ipv6_hashfn(const struct in6_addr *ip6) { return jhash_2words((__force u32)ip6->s6_addr32[0], (__force u32)ip6->s6_addr32[1], gtp_h_initval); } /* Resolve a PDP context structure based on the 64bit TID. */ static struct pdp_ctx *gtp0_pdp_find(struct gtp_dev *gtp, u64 tid, u16 family) { struct hlist_head *head; struct pdp_ctx *pdp; head = &gtp->tid_hash[gtp0_hashfn(tid) % gtp->hash_size]; hlist_for_each_entry_rcu(pdp, head, hlist_tid) { if (pdp->af == family && pdp->gtp_version == GTP_V0 && pdp->u.v0.tid == tid) return pdp; } return NULL; } /* Resolve a PDP context structure based on the 32bit TEI. */ static struct pdp_ctx *gtp1_pdp_find(struct gtp_dev *gtp, u32 tid, u16 family) { struct hlist_head *head; struct pdp_ctx *pdp; head = &gtp->tid_hash[gtp1u_hashfn(tid) % gtp->hash_size]; hlist_for_each_entry_rcu(pdp, head, hlist_tid) { if (pdp->af == family && pdp->gtp_version == GTP_V1 && pdp->u.v1.i_tei == tid) return pdp; } return NULL; } /* Resolve a PDP context based on IPv4 address of MS. */ static struct pdp_ctx *ipv4_pdp_find(struct gtp_dev *gtp, __be32 ms_addr) { struct hlist_head *head; struct pdp_ctx *pdp; head = &gtp->addr_hash[ipv4_hashfn(ms_addr) % gtp->hash_size]; hlist_for_each_entry_rcu(pdp, head, hlist_addr) { if (pdp->af == AF_INET && pdp->ms.addr.s_addr == ms_addr) return pdp; } return NULL; } /* 3GPP TS 29.060: PDN Connection: the association between a MS represented by * [...] one IPv6 *prefix* and a PDN represented by an APN. * * Then, 3GPP TS 29.061, Section 11.2.1.3 says: The size of the prefix shall be * according to the maximum prefix length for a global IPv6 address as * specified in the IPv6 Addressing Architecture, see RFC 4291. * * Finally, RFC 4291 section 2.5.4 states: All Global Unicast addresses other * than those that start with binary 000 have a 64-bit interface ID field * (i.e., n + m = 64). */ static bool ipv6_pdp_addr_equal(const struct in6_addr *a, const struct in6_addr *b) { return a->s6_addr32[0] == b->s6_addr32[0] && a->s6_addr32[1] == b->s6_addr32[1]; } static struct pdp_ctx *ipv6_pdp_find(struct gtp_dev *gtp, const struct in6_addr *ms_addr) { struct hlist_head *head; struct pdp_ctx *pdp; head = &gtp->addr_hash[ipv6_hashfn(ms_addr) % gtp->hash_size]; hlist_for_each_entry_rcu(pdp, head, hlist_addr) { if (pdp->af == AF_INET6 && ipv6_pdp_addr_equal(&pdp->ms.addr6, ms_addr)) return pdp; } return NULL; } static bool gtp_check_ms_ipv4(struct sk_buff *skb, struct pdp_ctx *pctx, unsigned int hdrlen, unsigned int role) { struct iphdr *iph; if (!pskb_may_pull(skb, hdrlen + sizeof(struct iphdr))) return false; iph = (struct iphdr *)(skb->data + hdrlen); if (role == GTP_ROLE_SGSN) return iph->daddr == pctx->ms.addr.s_addr; else return iph->saddr == pctx->ms.addr.s_addr; } static bool gtp_check_ms_ipv6(struct sk_buff *skb, struct pdp_ctx *pctx, unsigned int hdrlen, unsigned int role) { struct ipv6hdr *ip6h; int ret; if (!pskb_may_pull(skb, hdrlen + sizeof(struct ipv6hdr))) return false; ip6h = (struct ipv6hdr *)(skb->data + hdrlen); if ((ipv6_addr_type(&ip6h->saddr) & IPV6_ADDR_LINKLOCAL) || (ipv6_addr_type(&ip6h->daddr) & IPV6_ADDR_LINKLOCAL)) return false; if (role == GTP_ROLE_SGSN) { ret = ipv6_pdp_addr_equal(&ip6h->daddr, &pctx->ms.addr6); } else { ret = ipv6_pdp_addr_equal(&ip6h->saddr, &pctx->ms.addr6); } return ret; } /* Check if the inner IP address in this packet is assigned to any * existing mobile subscriber. */ static bool gtp_check_ms(struct sk_buff *skb, struct pdp_ctx *pctx, unsigned int hdrlen, unsigned int role, __u16 inner_proto) { switch (inner_proto) { case ETH_P_IP: return gtp_check_ms_ipv4(skb, pctx, hdrlen, role); case ETH_P_IPV6: return gtp_check_ms_ipv6(skb, pctx, hdrlen, role); } return false; } static int gtp_inner_proto(struct sk_buff *skb, unsigned int hdrlen, __u16 *inner_proto) { __u8 *ip_version, _ip_version; ip_version = skb_header_pointer(skb, hdrlen, sizeof(*ip_version), &_ip_version); if (!ip_version) return -1; switch (*ip_version & 0xf0) { case 0x40: *inner_proto = ETH_P_IP; break; case 0x60: *inner_proto = ETH_P_IPV6; break; default: return -1; } return 0; } static int gtp_rx(struct pdp_ctx *pctx, struct sk_buff *skb, unsigned int hdrlen, unsigned int role, __u16 inner_proto) { if (!gtp_check_ms(skb, pctx, hdrlen, role, inner_proto)) { netdev_dbg(pctx->dev, "No PDP ctx for this MS\n"); return 1; } /* Get rid of the GTP + UDP headers. */ if (iptunnel_pull_header(skb, hdrlen, htons(inner_proto), !net_eq(sock_net(pctx->sk), dev_net(pctx->dev)))) { pctx->dev->stats.rx_length_errors++; goto err; } netdev_dbg(pctx->dev, "forwarding packet from GGSN to uplink\n"); /* Now that the UDP and the GTP header have been removed, set up the * new network header. This is required by the upper layer to * calculate the transport header. */ skb_reset_network_header(skb); skb_reset_mac_header(skb); skb->dev = pctx->dev; dev_sw_netstats_rx_add(pctx->dev, skb->len); __netif_rx(skb); return 0; err: pctx->dev->stats.rx_dropped++; return -1; } static struct rtable *ip4_route_output_gtp(struct flowi4 *fl4, const struct sock *sk, __be32 daddr, __be32 saddr) { memset(fl4, 0, sizeof(*fl4)); fl4->flowi4_oif = sk->sk_bound_dev_if; fl4->daddr = daddr; fl4->saddr = saddr; fl4->flowi4_dscp = inet_sk_dscp(inet_sk(sk)); fl4->flowi4_scope = ip_sock_rt_scope(sk); fl4->flowi4_proto = sk->sk_protocol; return ip_route_output_key(sock_net(sk), fl4); } static struct rt6_info *ip6_route_output_gtp(struct net *net, struct flowi6 *fl6, const struct sock *sk, const struct in6_addr *daddr, struct in6_addr *saddr) { struct dst_entry *dst; memset(fl6, 0, sizeof(*fl6)); fl6->flowi6_oif = sk->sk_bound_dev_if; fl6->daddr = *daddr; fl6->saddr = *saddr; fl6->flowi6_proto = sk->sk_protocol; dst = ipv6_stub->ipv6_dst_lookup_flow(net, sk, fl6, NULL); if (IS_ERR(dst)) return ERR_PTR(-ENETUNREACH); return (struct rt6_info *)dst; } /* GSM TS 09.60. 7.3 * In all Path Management messages: * - TID: is not used and shall be set to 0. * - Flow Label is not used and shall be set to 0 * In signalling messages: * - number: this field is not yet used in signalling messages. * It shall be set to 255 by the sender and shall be ignored * by the receiver * Returns true if the echo req was correct, false otherwise. */ static bool gtp0_validate_echo_hdr(struct gtp0_header *gtp0) { return !(gtp0->tid || (gtp0->flags ^ 0x1e) || gtp0->number != 0xff || gtp0->flow); } /* msg_type has to be GTP_ECHO_REQ or GTP_ECHO_RSP */ static void gtp0_build_echo_msg(struct gtp0_header *hdr, __u8 msg_type) { int len_pkt, len_hdr; hdr->flags = 0x1e; /* v0, GTP-non-prime. */ hdr->type = msg_type; /* GSM TS 09.60. 7.3 In all Path Management Flow Label and TID * are not used and shall be set to 0. */ hdr->flow = 0; hdr->tid = 0; hdr->number = 0xff; hdr->spare[0] = 0xff; hdr->spare[1] = 0xff; hdr->spare[2] = 0xff; len_pkt = sizeof(struct gtp0_packet); len_hdr = sizeof(struct gtp0_header); if (msg_type == GTP_ECHO_RSP) hdr->length = htons(len_pkt - len_hdr); else hdr->length = 0; } static int gtp0_send_echo_resp_ip(struct gtp_dev *gtp, struct sk_buff *skb) { struct iphdr *iph = ip_hdr(skb); struct flowi4 fl4; struct rtable *rt; /* find route to the sender, * src address becomes dst address and vice versa. */ rt = ip4_route_output_gtp(&fl4, gtp->sk0, iph->saddr, iph->daddr); if (IS_ERR(rt)) { netdev_dbg(gtp->dev, "no route for echo response from %pI4\n", &iph->saddr); return -1; } udp_tunnel_xmit_skb(rt, gtp->sk0, skb, fl4.saddr, fl4.daddr, iph->tos, ip4_dst_hoplimit(&rt->dst), 0, htons(GTP0_PORT), htons(GTP0_PORT), !net_eq(sock_net(gtp->sk1u), dev_net(gtp->dev)), false, 0); return 0; } static int gtp0_send_echo_resp(struct gtp_dev *gtp, struct sk_buff *skb) { struct gtp0_packet *gtp_pkt; struct gtp0_header *gtp0; __be16 seq; gtp0 = (struct gtp0_header *)(skb->data + sizeof(struct udphdr)); if (!gtp0_validate_echo_hdr(gtp0)) return -1; seq = gtp0->seq; /* pull GTP and UDP headers */ skb_pull_data(skb, sizeof(struct gtp0_header) + sizeof(struct udphdr)); gtp_pkt = skb_push(skb, sizeof(struct gtp0_packet)); memset(gtp_pkt, 0, sizeof(struct gtp0_packet)); gtp0_build_echo_msg(&gtp_pkt->gtp0_h, GTP_ECHO_RSP); /* GSM TS 09.60. 7.3 The Sequence Number in a signalling response * message shall be copied from the signalling request message * that the GSN is replying to. */ gtp_pkt->gtp0_h.seq = seq; gtp_pkt->ie.tag = GTPIE_RECOVERY; gtp_pkt->ie.val = gtp->restart_count; switch (gtp->sk0->sk_family) { case AF_INET: if (gtp0_send_echo_resp_ip(gtp, skb) < 0) return -1; break; case AF_INET6: return -1; } return 0; } static int gtp_genl_fill_echo(struct sk_buff *skb, u32 snd_portid, u32 snd_seq, int flags, u32 type, struct echo_info echo) { void *genlh; genlh = genlmsg_put(skb, snd_portid, snd_seq, &gtp_genl_family, flags, type); if (!genlh) goto failure; if (nla_put_u32(skb, GTPA_VERSION, echo.gtp_version) || nla_put_be32(skb, GTPA_PEER_ADDRESS, echo.peer.addr.s_addr) || nla_put_be32(skb, GTPA_MS_ADDRESS, echo.ms.addr.s_addr)) goto failure; genlmsg_end(skb, genlh); return 0; failure: genlmsg_cancel(skb, genlh); return -EMSGSIZE; } static void gtp0_handle_echo_resp_ip(struct sk_buff *skb, struct echo_info *echo) { struct iphdr *iph = ip_hdr(skb); echo->ms.addr.s_addr = iph->daddr; echo->peer.addr.s_addr = iph->saddr; echo->gtp_version = GTP_V0; } static int gtp0_handle_echo_resp(struct gtp_dev *gtp, struct sk_buff *skb) { struct gtp0_header *gtp0; struct echo_info echo; struct sk_buff *msg; int ret; gtp0 = (struct gtp0_header *)(skb->data + sizeof(struct udphdr)); if (!gtp0_validate_echo_hdr(gtp0)) return -1; switch (gtp->sk0->sk_family) { case AF_INET: gtp0_handle_echo_resp_ip(skb, &echo); break; case AF_INET6: return -1; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) return -ENOMEM; ret = gtp_genl_fill_echo(msg, 0, 0, 0, GTP_CMD_ECHOREQ, echo); if (ret < 0) { nlmsg_free(msg); return ret; } return genlmsg_multicast_netns(&gtp_genl_family, dev_net(gtp->dev), msg, 0, GTP_GENL_MCGRP, GFP_ATOMIC); } static int gtp_proto_to_family(__u16 proto) { switch (proto) { case ETH_P_IP: return AF_INET; case ETH_P_IPV6: return AF_INET6; default: WARN_ON_ONCE(1); break; } return AF_UNSPEC; } /* 1 means pass up to the stack, -1 means drop and 0 means decapsulated. */ static int gtp0_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb) { unsigned int hdrlen = sizeof(struct udphdr) + sizeof(struct gtp0_header); struct gtp0_header *gtp0; struct pdp_ctx *pctx; __u16 inner_proto; if (!pskb_may_pull(skb, hdrlen)) return -1; gtp0 = (struct gtp0_header *)(skb->data + sizeof(struct udphdr)); if ((gtp0->flags >> 5) != GTP_V0) return 1; /* If the sockets were created in kernel, it means that * there is no daemon running in userspace which would * handle echo request. */ if (gtp0->type == GTP_ECHO_REQ && gtp->sk_created) return gtp0_send_echo_resp(gtp, skb); if (gtp0->type == GTP_ECHO_RSP && gtp->sk_created) return gtp0_handle_echo_resp(gtp, skb); if (gtp0->type != GTP_TPDU) return 1; if (gtp_inner_proto(skb, hdrlen, &inner_proto) < 0) { netdev_dbg(gtp->dev, "GTP packet does not encapsulate an IP packet\n"); return -1; } pctx = gtp0_pdp_find(gtp, be64_to_cpu(gtp0->tid), gtp_proto_to_family(inner_proto)); if (!pctx) { netdev_dbg(gtp->dev, "No PDP ctx to decap skb=%p\n", skb); return 1; } return gtp_rx(pctx, skb, hdrlen, gtp->role, inner_proto); } /* msg_type has to be GTP_ECHO_REQ or GTP_ECHO_RSP */ static void gtp1u_build_echo_msg(struct gtp1_header_long *hdr, __u8 msg_type) { int len_pkt, len_hdr; /* S flag must be set to 1 */ hdr->flags = 0x32; /* v1, GTP-non-prime. */ hdr->type = msg_type; /* 3GPP TS 29.281 5.1 - TEID has to be set to 0 */ hdr->tid = 0; /* seq, npdu and next should be counted to the length of the GTP packet * that's why size of gtp1_header should be subtracted, * not size of gtp1_header_long. */ len_hdr = sizeof(struct gtp1_header); if (msg_type == GTP_ECHO_RSP) { len_pkt = sizeof(struct gtp1u_packet); hdr->length = htons(len_pkt - len_hdr); } else { /* GTP_ECHO_REQ does not carry GTP Information Element, * the why gtp1_header_long is used here. */ len_pkt = sizeof(struct gtp1_header_long); hdr->length = htons(len_pkt - len_hdr); } } static int gtp1u_send_echo_resp(struct gtp_dev *gtp, struct sk_buff *skb) { struct gtp1_header_long *gtp1u; struct gtp1u_packet *gtp_pkt; struct rtable *rt; struct flowi4 fl4; struct iphdr *iph; gtp1u = (struct gtp1_header_long *)(skb->data + sizeof(struct udphdr)); /* 3GPP TS 29.281 5.1 - For the Echo Request, Echo Response, * Error Indication and Supported Extension Headers Notification * messages, the S flag shall be set to 1 and TEID shall be set to 0. */ if (!(gtp1u->flags & GTP1_F_SEQ) || gtp1u->tid) return -1; /* pull GTP and UDP headers */ skb_pull_data(skb, sizeof(struct gtp1_header_long) + sizeof(struct udphdr)); gtp_pkt = skb_push(skb, sizeof(struct gtp1u_packet)); memset(gtp_pkt, 0, sizeof(struct gtp1u_packet)); gtp1u_build_echo_msg(&gtp_pkt->gtp1u_h, GTP_ECHO_RSP); /* 3GPP TS 29.281 7.7.2 - The Restart Counter value in the * Recovery information element shall not be used, i.e. it shall * be set to zero by the sender and shall be ignored by the receiver. * The Recovery information element is mandatory due to backwards * compatibility reasons. */ gtp_pkt->ie.tag = GTPIE_RECOVERY; gtp_pkt->ie.val = 0; iph = ip_hdr(skb); /* find route to the sender, * src address becomes dst address and vice versa. */ rt = ip4_route_output_gtp(&fl4, gtp->sk1u, iph->saddr, iph->daddr); if (IS_ERR(rt)) { netdev_dbg(gtp->dev, "no route for echo response from %pI4\n", &iph->saddr); return -1; } udp_tunnel_xmit_skb(rt, gtp->sk1u, skb, fl4.saddr, fl4.daddr, iph->tos, ip4_dst_hoplimit(&rt->dst), 0, htons(GTP1U_PORT), htons(GTP1U_PORT), !net_eq(sock_net(gtp->sk1u), dev_net(gtp->dev)), false, 0); return 0; } static int gtp1u_handle_echo_resp(struct gtp_dev *gtp, struct sk_buff *skb) { struct gtp1_header_long *gtp1u; struct echo_info echo; struct sk_buff *msg; struct iphdr *iph; int ret; gtp1u = (struct gtp1_header_long *)(skb->data + sizeof(struct udphdr)); /* 3GPP TS 29.281 5.1 - For the Echo Request, Echo Response, * Error Indication and Supported Extension Headers Notification * messages, the S flag shall be set to 1 and TEID shall be set to 0. */ if (!(gtp1u->flags & GTP1_F_SEQ) || gtp1u->tid) return -1; iph = ip_hdr(skb); echo.ms.addr.s_addr = iph->daddr; echo.peer.addr.s_addr = iph->saddr; echo.gtp_version = GTP_V1; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) return -ENOMEM; ret = gtp_genl_fill_echo(msg, 0, 0, 0, GTP_CMD_ECHOREQ, echo); if (ret < 0) { nlmsg_free(msg); return ret; } return genlmsg_multicast_netns(&gtp_genl_family, dev_net(gtp->dev), msg, 0, GTP_GENL_MCGRP, GFP_ATOMIC); } static int gtp_parse_exthdrs(struct sk_buff *skb, unsigned int *hdrlen) { struct gtp_ext_hdr *gtp_exthdr, _gtp_exthdr; unsigned int offset = *hdrlen; __u8 *next_type, _next_type; /* From 29.060: "The Extension Header Length field specifies the length * of the particular Extension header in 4 octets units." * * This length field includes length field size itself (1 byte), * payload (variable length) and next type (1 byte). The extension * header is aligned to to 4 bytes. */ do { gtp_exthdr = skb_header_pointer(skb, offset, sizeof(*gtp_exthdr), &_gtp_exthdr); if (!gtp_exthdr || !gtp_exthdr->len) return -1; offset += gtp_exthdr->len * 4; /* From 29.060: "If no such Header follows, then the value of * the Next Extension Header Type shall be 0." */ next_type = skb_header_pointer(skb, offset - 1, sizeof(_next_type), &_next_type); if (!next_type) return -1; } while (*next_type != 0); *hdrlen = offset; return 0; } static int gtp1u_udp_encap_recv(struct gtp_dev *gtp, struct sk_buff *skb) { unsigned int hdrlen = sizeof(struct udphdr) + sizeof(struct gtp1_header); struct gtp1_header *gtp1; struct pdp_ctx *pctx; __u16 inner_proto; if (!pskb_may_pull(skb, hdrlen)) return -1; gtp1 = (struct gtp1_header *)(skb->data + sizeof(struct udphdr)); if ((gtp1->flags >> 5) != GTP_V1) return 1; /* If the sockets were created in kernel, it means that * there is no daemon running in userspace which would * handle echo request. */ if (gtp1->type == GTP_ECHO_REQ && gtp->sk_created) return gtp1u_send_echo_resp(gtp, skb); if (gtp1->type == GTP_ECHO_RSP && gtp->sk_created) return gtp1u_handle_echo_resp(gtp, skb); if (gtp1->type != GTP_TPDU) return 1; /* From 29.060: "This field shall be present if and only if any one or * more of the S, PN and E flags are set.". * * If any of the bit is set, then the remaining ones also have to be * set. */ if (gtp1->flags & GTP1_F_MASK) hdrlen += 4; /* Make sure the header is larger enough, including extensions. */ if (!pskb_may_pull(skb, hdrlen)) return -1; if (gtp_inner_proto(skb, hdrlen, &inner_proto) < 0) { netdev_dbg(gtp->dev, "GTP packet does not encapsulate an IP packet\n"); return -1; } gtp1 = (struct gtp1_header *)(skb->data + sizeof(struct udphdr)); pctx = gtp1_pdp_find(gtp, ntohl(gtp1->tid), gtp_proto_to_family(inner_proto)); if (!pctx) { netdev_dbg(gtp->dev, "No PDP ctx to decap skb=%p\n", skb); return 1; } if (gtp1->flags & GTP1_F_EXTHDR && gtp_parse_exthdrs(skb, &hdrlen) < 0) return -1; return gtp_rx(pctx, skb, hdrlen, gtp->role, inner_proto); } static void __gtp_encap_destroy(struct sock *sk) { struct gtp_dev *gtp; lock_sock(sk); gtp = sk->sk_user_data; if (gtp) { if (gtp->sk0 == sk) gtp->sk0 = NULL; else gtp->sk1u = NULL; WRITE_ONCE(udp_sk(sk)->encap_type, 0); rcu_assign_sk_user_data(sk, NULL); release_sock(sk); sock_put(sk); return; } release_sock(sk); } static void gtp_encap_destroy(struct sock *sk) { rtnl_lock(); __gtp_encap_destroy(sk); rtnl_unlock(); } static void gtp_encap_disable_sock(struct sock *sk) { if (!sk) return; __gtp_encap_destroy(sk); } static void gtp_encap_disable(struct gtp_dev *gtp) { if (gtp->sk_created) { udp_tunnel_sock_release(gtp->sk0->sk_socket); udp_tunnel_sock_release(gtp->sk1u->sk_socket); gtp->sk_created = false; gtp->sk0 = NULL; gtp->sk1u = NULL; } else { gtp_encap_disable_sock(gtp->sk0); gtp_encap_disable_sock(gtp->sk1u); } } /* UDP encapsulation receive handler. See net/ipv4/udp.c. * Return codes: 0: success, <0: error, >0: pass up to userspace UDP socket. */ static int gtp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct gtp_dev *gtp; int ret = 0; gtp = rcu_dereference_sk_user_data(sk); if (!gtp) return 1; netdev_dbg(gtp->dev, "encap_recv sk=%p\n", sk); switch (READ_ONCE(udp_sk(sk)->encap_type)) { case UDP_ENCAP_GTP0: netdev_dbg(gtp->dev, "received GTP0 packet\n"); ret = gtp0_udp_encap_recv(gtp, skb); break; case UDP_ENCAP_GTP1U: netdev_dbg(gtp->dev, "received GTP1U packet\n"); ret = gtp1u_udp_encap_recv(gtp, skb); break; default: ret = -1; /* Shouldn't happen. */ } switch (ret) { case 1: netdev_dbg(gtp->dev, "pass up to the process\n"); break; case 0: break; case -1: netdev_dbg(gtp->dev, "GTP packet has been dropped\n"); kfree_skb(skb); ret = 0; break; } return ret; } static void gtp_dev_uninit(struct net_device *dev) { struct gtp_dev *gtp = netdev_priv(dev); gtp_encap_disable(gtp); } static inline void gtp0_push_header(struct sk_buff *skb, struct pdp_ctx *pctx) { int payload_len = skb->len; struct gtp0_header *gtp0; gtp0 = skb_push(skb, sizeof(*gtp0)); gtp0->flags = 0x1e; /* v0, GTP-non-prime. */ gtp0->type = GTP_TPDU; gtp0->length = htons(payload_len); gtp0->seq = htons((atomic_inc_return(&pctx->tx_seq) - 1) % 0xffff); gtp0->flow = htons(pctx->u.v0.flow); gtp0->number = 0xff; gtp0->spare[0] = gtp0->spare[1] = gtp0->spare[2] = 0xff; gtp0->tid = cpu_to_be64(pctx->u.v0.tid); } static inline void gtp1_push_header(struct sk_buff *skb, struct pdp_ctx *pctx) { int payload_len = skb->len; struct gtp1_header *gtp1; gtp1 = skb_push(skb, sizeof(*gtp1)); /* Bits 8 7 6 5 4 3 2 1 * +--+--+--+--+--+--+--+--+ * |version |PT| 0| E| S|PN| * +--+--+--+--+--+--+--+--+ * 0 0 1 1 1 0 0 0 */ gtp1->flags = 0x30; /* v1, GTP-non-prime. */ gtp1->type = GTP_TPDU; gtp1->length = htons(payload_len); gtp1->tid = htonl(pctx->u.v1.o_tei); /* TODO: Support for extension header, sequence number and N-PDU. * Update the length field if any of them is available. */ } struct gtp_pktinfo { struct sock *sk; union { struct flowi4 fl4; struct flowi6 fl6; }; union { struct rtable *rt; struct rt6_info *rt6; }; struct pdp_ctx *pctx; struct net_device *dev; __u8 tos; __be16 gtph_port; }; static void gtp_push_header(struct sk_buff *skb, struct gtp_pktinfo *pktinfo) { switch (pktinfo->pctx->gtp_version) { case GTP_V0: pktinfo->gtph_port = htons(GTP0_PORT); gtp0_push_header(skb, pktinfo->pctx); break; case GTP_V1: pktinfo->gtph_port = htons(GTP1U_PORT); gtp1_push_header(skb, pktinfo->pctx); break; } } static inline void gtp_set_pktinfo_ipv4(struct gtp_pktinfo *pktinfo, struct sock *sk, __u8 tos, struct pdp_ctx *pctx, struct rtable *rt, struct flowi4 *fl4, struct net_device *dev) { pktinfo->sk = sk; pktinfo->tos = tos; pktinfo->pctx = pctx; pktinfo->rt = rt; pktinfo->fl4 = *fl4; pktinfo->dev = dev; } static void gtp_set_pktinfo_ipv6(struct gtp_pktinfo *pktinfo, struct sock *sk, __u8 tos, struct pdp_ctx *pctx, struct rt6_info *rt6, struct flowi6 *fl6, struct net_device *dev) { pktinfo->sk = sk; pktinfo->tos = tos; pktinfo->pctx = pctx; pktinfo->rt6 = rt6; pktinfo->fl6 = *fl6; pktinfo->dev = dev; } static int gtp_build_skb_outer_ip4(struct sk_buff *skb, struct net_device *dev, struct gtp_pktinfo *pktinfo, struct pdp_ctx *pctx, __u8 tos, __be16 frag_off) { struct rtable *rt; struct flowi4 fl4; __be16 df; int mtu; rt = ip4_route_output_gtp(&fl4, pctx->sk, pctx->peer.addr.s_addr, inet_sk(pctx->sk)->inet_saddr); if (IS_ERR(rt)) { netdev_dbg(dev, "no route to SSGN %pI4\n", &pctx->peer.addr.s_addr); dev->stats.tx_carrier_errors++; goto err; } if (rt->dst.dev == dev) { netdev_dbg(dev, "circular route to SSGN %pI4\n", &pctx->peer.addr.s_addr); dev->stats.collisions++; goto err_rt; } /* This is similar to tnl_update_pmtu(). */ df = frag_off; if (df) { mtu = dst_mtu(&rt->dst) - dev->hard_header_len - sizeof(struct iphdr) - sizeof(struct udphdr); switch (pctx->gtp_version) { case GTP_V0: mtu -= sizeof(struct gtp0_header); break; case GTP_V1: mtu -= sizeof(struct gtp1_header); break; } } else { mtu = dst_mtu(&rt->dst); } skb_dst_update_pmtu_no_confirm(skb, mtu); if (frag_off & htons(IP_DF) && ((!skb_is_gso(skb) && skb->len > mtu) || (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu)))) { netdev_dbg(dev, "packet too big, fragmentation needed\n"); icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); goto err_rt; } gtp_set_pktinfo_ipv4(pktinfo, pctx->sk, tos, pctx, rt, &fl4, dev); gtp_push_header(skb, pktinfo); return 0; err_rt: ip_rt_put(rt); err: return -EBADMSG; } static int gtp_build_skb_outer_ip6(struct net *net, struct sk_buff *skb, struct net_device *dev, struct gtp_pktinfo *pktinfo, struct pdp_ctx *pctx, __u8 tos) { struct dst_entry *dst; struct rt6_info *rt; struct flowi6 fl6; int mtu; rt = ip6_route_output_gtp(net, &fl6, pctx->sk, &pctx->peer.addr6, &inet6_sk(pctx->sk)->saddr); if (IS_ERR(rt)) { netdev_dbg(dev, "no route to SSGN %pI6\n", &pctx->peer.addr6); dev->stats.tx_carrier_errors++; goto err; } dst = &rt->dst; if (rt->dst.dev == dev) { netdev_dbg(dev, "circular route to SSGN %pI6\n", &pctx->peer.addr6); dev->stats.collisions++; goto err_rt; } mtu = dst_mtu(&rt->dst) - dev->hard_header_len - sizeof(struct ipv6hdr) - sizeof(struct udphdr); switch (pctx->gtp_version) { case GTP_V0: mtu -= sizeof(struct gtp0_header); break; case GTP_V1: mtu -= sizeof(struct gtp1_header); break; } skb_dst_update_pmtu_no_confirm(skb, mtu); if ((!skb_is_gso(skb) && skb->len > mtu) || (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))) { netdev_dbg(dev, "packet too big, fragmentation needed\n"); icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); goto err_rt; } gtp_set_pktinfo_ipv6(pktinfo, pctx->sk, tos, pctx, rt, &fl6, dev); gtp_push_header(skb, pktinfo); return 0; err_rt: dst_release(dst); err: return -EBADMSG; } static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev, struct gtp_pktinfo *pktinfo) { struct gtp_dev *gtp = netdev_priv(dev); struct net *net = gtp->net; struct pdp_ctx *pctx; struct iphdr *iph; int ret; /* Read the IP destination address and resolve the PDP context. * Prepend PDP header with TEI/TID from PDP ctx. */ iph = ip_hdr(skb); if (gtp->role == GTP_ROLE_SGSN) pctx = ipv4_pdp_find(gtp, iph->saddr); else pctx = ipv4_pdp_find(gtp, iph->daddr); if (!pctx) { netdev_dbg(dev, "no PDP ctx found for %pI4, skip\n", &iph->daddr); return -ENOENT; } netdev_dbg(dev, "found PDP context %p\n", pctx); switch (pctx->sk->sk_family) { case AF_INET: ret = gtp_build_skb_outer_ip4(skb, dev, pktinfo, pctx, iph->tos, iph->frag_off); break; case AF_INET6: ret = gtp_build_skb_outer_ip6(net, skb, dev, pktinfo, pctx, iph->tos); break; default: ret = -1; WARN_ON_ONCE(1); break; } if (ret < 0) return ret; netdev_dbg(dev, "gtp -> IP src: %pI4 dst: %pI4\n", &iph->saddr, &iph->daddr); return 0; } static int gtp_build_skb_ip6(struct sk_buff *skb, struct net_device *dev, struct gtp_pktinfo *pktinfo) { struct gtp_dev *gtp = netdev_priv(dev); struct net *net = gtp->net; struct pdp_ctx *pctx; struct ipv6hdr *ip6h; __u8 tos; int ret; /* Read the IP destination address and resolve the PDP context. * Prepend PDP header with TEI/TID from PDP ctx. */ ip6h = ipv6_hdr(skb); if (gtp->role == GTP_ROLE_SGSN) pctx = ipv6_pdp_find(gtp, &ip6h->saddr); else pctx = ipv6_pdp_find(gtp, &ip6h->daddr); if (!pctx) { netdev_dbg(dev, "no PDP ctx found for %pI6, skip\n", &ip6h->daddr); return -ENOENT; } netdev_dbg(dev, "found PDP context %p\n", pctx); tos = ipv6_get_dsfield(ip6h); switch (pctx->sk->sk_family) { case AF_INET: ret = gtp_build_skb_outer_ip4(skb, dev, pktinfo, pctx, tos, 0); break; case AF_INET6: ret = gtp_build_skb_outer_ip6(net, skb, dev, pktinfo, pctx, tos); break; default: ret = -1; WARN_ON_ONCE(1); break; } if (ret < 0) return ret; netdev_dbg(dev, "gtp -> IP src: %pI6 dst: %pI6\n", &ip6h->saddr, &ip6h->daddr); return 0; } static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev) { unsigned int proto = ntohs(skb->protocol); struct gtp_pktinfo pktinfo; int err; /* Ensure there is sufficient headroom. */ if (skb_cow_head(skb, dev->needed_headroom)) goto tx_err; if (!pskb_inet_may_pull(skb)) goto tx_err; skb_reset_inner_headers(skb); /* PDP context lookups in gtp_build_skb_*() need rcu read-side lock. */ rcu_read_lock(); switch (proto) { case ETH_P_IP: err = gtp_build_skb_ip4(skb, dev, &pktinfo); break; case ETH_P_IPV6: err = gtp_build_skb_ip6(skb, dev, &pktinfo); break; default: err = -EOPNOTSUPP; break; } rcu_read_unlock(); if (err < 0) goto tx_err; switch (pktinfo.pctx->sk->sk_family) { case AF_INET: udp_tunnel_xmit_skb(pktinfo.rt, pktinfo.sk, skb, pktinfo.fl4.saddr, pktinfo.fl4.daddr, pktinfo.tos, ip4_dst_hoplimit(&pktinfo.rt->dst), 0, pktinfo.gtph_port, pktinfo.gtph_port, !net_eq(sock_net(pktinfo.pctx->sk), dev_net(dev)), false, 0); break; case AF_INET6: #if IS_ENABLED(CONFIG_IPV6) udp_tunnel6_xmit_skb(&pktinfo.rt6->dst, pktinfo.sk, skb, dev, &pktinfo.fl6.saddr, &pktinfo.fl6.daddr, pktinfo.tos, ip6_dst_hoplimit(&pktinfo.rt->dst), 0, pktinfo.gtph_port, pktinfo.gtph_port, false, 0); #else goto tx_err; #endif break; } return NETDEV_TX_OK; tx_err: dev->stats.tx_errors++; dev_kfree_skb(skb); return NETDEV_TX_OK; } static const struct net_device_ops gtp_netdev_ops = { .ndo_uninit = gtp_dev_uninit, .ndo_start_xmit = gtp_dev_xmit, }; static const struct device_type gtp_type = { .name = "gtp", }; #define GTP_TH_MAXLEN (sizeof(struct udphdr) + sizeof(struct gtp0_header)) #define GTP_IPV4_MAXLEN (sizeof(struct iphdr) + GTP_TH_MAXLEN) static void gtp_link_setup(struct net_device *dev) { struct gtp_dev *gtp = netdev_priv(dev); dev->netdev_ops = &gtp_netdev_ops; dev->needs_free_netdev = true; SET_NETDEV_DEVTYPE(dev, &gtp_type); dev->hard_header_len = 0; dev->addr_len = 0; dev->mtu = ETH_DATA_LEN - GTP_IPV4_MAXLEN; /* Zero header length. */ dev->type = ARPHRD_NONE; dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; dev->priv_flags |= IFF_NO_QUEUE; dev->lltx = true; netif_keep_dst(dev); dev->needed_headroom = LL_MAX_HEADER + GTP_IPV4_MAXLEN; gtp->dev = dev; } static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize); static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[]); static void gtp_destructor(struct net_device *dev) { struct gtp_dev *gtp = netdev_priv(dev); kfree(gtp->addr_hash); kfree(gtp->tid_hash); } static int gtp_sock_udp_config(struct udp_port_cfg *udp_conf, const struct nlattr *nla, int family) { udp_conf->family = family; switch (udp_conf->family) { case AF_INET: udp_conf->local_ip.s_addr = nla_get_be32(nla); break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: udp_conf->local_ip6 = nla_get_in6_addr(nla); break; #endif default: return -EOPNOTSUPP; } return 0; } static struct sock *gtp_create_sock(int type, struct gtp_dev *gtp, const struct nlattr *nla, int family) { struct udp_tunnel_sock_cfg tuncfg = {}; struct udp_port_cfg udp_conf = {}; struct net *net = gtp->net; struct socket *sock; int err; if (nla) { err = gtp_sock_udp_config(&udp_conf, nla, family); if (err < 0) return ERR_PTR(err); } else { udp_conf.local_ip.s_addr = htonl(INADDR_ANY); udp_conf.family = AF_INET; } if (type == UDP_ENCAP_GTP0) udp_conf.local_udp_port = htons(GTP0_PORT); else if (type == UDP_ENCAP_GTP1U) udp_conf.local_udp_port = htons(GTP1U_PORT); else return ERR_PTR(-EINVAL); err = udp_sock_create(net, &udp_conf, &sock); if (err) return ERR_PTR(err); tuncfg.sk_user_data = gtp; tuncfg.encap_type = type; tuncfg.encap_rcv = gtp_encap_recv; tuncfg.encap_destroy = NULL; setup_udp_tunnel_sock(net, sock, &tuncfg); return sock->sk; } static int gtp_create_sockets(struct gtp_dev *gtp, const struct nlattr *nla, int family) { struct sock *sk1u; struct sock *sk0; sk0 = gtp_create_sock(UDP_ENCAP_GTP0, gtp, nla, family); if (IS_ERR(sk0)) return PTR_ERR(sk0); sk1u = gtp_create_sock(UDP_ENCAP_GTP1U, gtp, nla, family); if (IS_ERR(sk1u)) { udp_tunnel_sock_release(sk0->sk_socket); return PTR_ERR(sk1u); } gtp->sk_created = true; gtp->sk0 = sk0; gtp->sk1u = sk1u; return 0; } #define GTP_TH_MAXLEN (sizeof(struct udphdr) + sizeof(struct gtp0_header)) #define GTP_IPV6_MAXLEN (sizeof(struct ipv6hdr) + GTP_TH_MAXLEN) static int gtp_newlink(struct net_device *dev, struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { struct net *link_net = rtnl_newlink_link_net(params); struct nlattr **data = params->data; unsigned int role = GTP_ROLE_GGSN; struct gtp_dev *gtp; struct gtp_net *gn; int hashsize, err; #if !IS_ENABLED(CONFIG_IPV6) if (data[IFLA_GTP_LOCAL6]) return -EAFNOSUPPORT; #endif gtp = netdev_priv(dev); if (!data[IFLA_GTP_PDP_HASHSIZE]) { hashsize = 1024; } else { hashsize = nla_get_u32(data[IFLA_GTP_PDP_HASHSIZE]); if (!hashsize) hashsize = 1024; } if (data[IFLA_GTP_ROLE]) { role = nla_get_u32(data[IFLA_GTP_ROLE]); if (role > GTP_ROLE_SGSN) return -EINVAL; } gtp->role = role; gtp->restart_count = nla_get_u8_default(data[IFLA_GTP_RESTART_COUNT], 0); gtp->net = link_net; err = gtp_hashtable_new(gtp, hashsize); if (err < 0) return err; if (data[IFLA_GTP_CREATE_SOCKETS]) { if (data[IFLA_GTP_LOCAL6]) err = gtp_create_sockets(gtp, data[IFLA_GTP_LOCAL6], AF_INET6); else err = gtp_create_sockets(gtp, data[IFLA_GTP_LOCAL], AF_INET); } else { err = gtp_encap_enable(gtp, data); } if (err < 0) goto out_hashtable; if ((gtp->sk0 && gtp->sk0->sk_family == AF_INET6) || (gtp->sk1u && gtp->sk1u->sk_family == AF_INET6)) { dev->mtu = ETH_DATA_LEN - GTP_IPV6_MAXLEN; dev->needed_headroom = LL_MAX_HEADER + GTP_IPV6_MAXLEN; } err = register_netdevice(dev); if (err < 0) { netdev_dbg(dev, "failed to register new netdev %d\n", err); goto out_encap; } gn = net_generic(link_net, gtp_net_id); list_add(&gtp->list, &gn->gtp_dev_list); dev->priv_destructor = gtp_destructor; netdev_dbg(dev, "registered new GTP interface\n"); return 0; out_encap: gtp_encap_disable(gtp); out_hashtable: kfree(gtp->addr_hash); kfree(gtp->tid_hash); return err; } static void gtp_dellink(struct net_device *dev, struct list_head *head) { struct gtp_dev *gtp = netdev_priv(dev); struct hlist_node *next; struct pdp_ctx *pctx; int i; for (i = 0; i < gtp->hash_size; i++) hlist_for_each_entry_safe(pctx, next, &gtp->tid_hash[i], hlist_tid) pdp_context_delete(pctx); list_del(&gtp->list); unregister_netdevice_queue(dev, head); } static const struct nla_policy gtp_policy[IFLA_GTP_MAX + 1] = { [IFLA_GTP_FD0] = { .type = NLA_U32 }, [IFLA_GTP_FD1] = { .type = NLA_U32 }, [IFLA_GTP_PDP_HASHSIZE] = { .type = NLA_U32 }, [IFLA_GTP_ROLE] = { .type = NLA_U32 }, [IFLA_GTP_CREATE_SOCKETS] = { .type = NLA_U8 }, [IFLA_GTP_RESTART_COUNT] = { .type = NLA_U8 }, [IFLA_GTP_LOCAL] = { .type = NLA_U32 }, [IFLA_GTP_LOCAL6] = { .len = sizeof(struct in6_addr) }, }; static int gtp_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { if (!data) return -EINVAL; return 0; } static size_t gtp_get_size(const struct net_device *dev) { return nla_total_size(sizeof(__u32)) + /* IFLA_GTP_PDP_HASHSIZE */ nla_total_size(sizeof(__u32)) + /* IFLA_GTP_ROLE */ nla_total_size(sizeof(__u8)); /* IFLA_GTP_RESTART_COUNT */ } static int gtp_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct gtp_dev *gtp = netdev_priv(dev); if (nla_put_u32(skb, IFLA_GTP_PDP_HASHSIZE, gtp->hash_size)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_GTP_ROLE, gtp->role)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_GTP_RESTART_COUNT, gtp->restart_count)) goto nla_put_failure; return 0; nla_put_failure: return -EMSGSIZE; } static struct rtnl_link_ops gtp_link_ops __read_mostly = { .kind = "gtp", .maxtype = IFLA_GTP_MAX, .policy = gtp_policy, .priv_size = sizeof(struct gtp_dev), .setup = gtp_link_setup, .validate = gtp_validate, .newlink = gtp_newlink, .dellink = gtp_dellink, .get_size = gtp_get_size, .fill_info = gtp_fill_info, }; static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize) { int i; gtp->addr_hash = kmalloc_array(hsize, sizeof(struct hlist_head), GFP_KERNEL | __GFP_NOWARN); if (gtp->addr_hash == NULL) return -ENOMEM; gtp->tid_hash = kmalloc_array(hsize, sizeof(struct hlist_head), GFP_KERNEL | __GFP_NOWARN); if (gtp->tid_hash == NULL) goto err1; gtp->hash_size = hsize; for (i = 0; i < hsize; i++) { INIT_HLIST_HEAD(&gtp->addr_hash[i]); INIT_HLIST_HEAD(&gtp->tid_hash[i]); } return 0; err1: kfree(gtp->addr_hash); return -ENOMEM; } static struct sock *gtp_encap_enable_socket(int fd, int type, struct gtp_dev *gtp) { struct udp_tunnel_sock_cfg tuncfg = {NULL}; struct socket *sock; struct sock *sk; int err; pr_debug("enable gtp on %d, %d\n", fd, type); sock = sockfd_lookup(fd, &err); if (!sock) { pr_debug("gtp socket fd=%d not found\n", fd); return ERR_PTR(err); } sk = sock->sk; if (sk->sk_protocol != IPPROTO_UDP || sk->sk_type != SOCK_DGRAM || (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)) { pr_debug("socket fd=%d not UDP\n", fd); sk = ERR_PTR(-EINVAL); goto out_sock; } if (sk->sk_family == AF_INET6 && !sk->sk_ipv6only) { sk = ERR_PTR(-EADDRNOTAVAIL); goto out_sock; } lock_sock(sk); if (sk->sk_user_data) { sk = ERR_PTR(-EBUSY); goto out_rel_sock; } sock_hold(sk); tuncfg.sk_user_data = gtp; tuncfg.encap_type = type; tuncfg.encap_rcv = gtp_encap_recv; tuncfg.encap_destroy = gtp_encap_destroy; setup_udp_tunnel_sock(sock_net(sock->sk), sock, &tuncfg); out_rel_sock: release_sock(sock->sk); out_sock: sockfd_put(sock); return sk; } static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[]) { struct sock *sk1u = NULL; struct sock *sk0 = NULL; if (!data[IFLA_GTP_FD0] && !data[IFLA_GTP_FD1]) return -EINVAL; if (data[IFLA_GTP_FD0]) { int fd0 = nla_get_u32(data[IFLA_GTP_FD0]); if (fd0 >= 0) { sk0 = gtp_encap_enable_socket(fd0, UDP_ENCAP_GTP0, gtp); if (IS_ERR(sk0)) return PTR_ERR(sk0); } } if (data[IFLA_GTP_FD1]) { int fd1 = nla_get_u32(data[IFLA_GTP_FD1]); if (fd1 >= 0) { sk1u = gtp_encap_enable_socket(fd1, UDP_ENCAP_GTP1U, gtp); if (IS_ERR(sk1u)) { gtp_encap_disable_sock(sk0); return PTR_ERR(sk1u); } } } gtp->sk0 = sk0; gtp->sk1u = sk1u; if (sk0 && sk1u && sk0->sk_family != sk1u->sk_family) { gtp_encap_disable_sock(sk0); gtp_encap_disable_sock(sk1u); return -EINVAL; } return 0; } static struct gtp_dev *gtp_find_dev(struct net *src_net, struct nlattr *nla[]) { struct gtp_dev *gtp = NULL; struct net_device *dev; struct net *net; /* Examine the link attributes and figure out which network namespace * we are talking about. */ if (nla[GTPA_NET_NS_FD]) net = get_net_ns_by_fd(nla_get_u32(nla[GTPA_NET_NS_FD])); else net = get_net(src_net); if (IS_ERR(net)) return NULL; /* Check if there's an existing gtpX device to configure */ dev = dev_get_by_index_rcu(net, nla_get_u32(nla[GTPA_LINK])); if (dev && dev->netdev_ops == &gtp_netdev_ops) gtp = netdev_priv(dev); put_net(net); return gtp; } static void gtp_pdp_fill(struct pdp_ctx *pctx, struct genl_info *info) { pctx->gtp_version = nla_get_u32(info->attrs[GTPA_VERSION]); switch (pctx->gtp_version) { case GTP_V0: /* According to TS 09.60, sections 7.5.1 and 7.5.2, the flow * label needs to be the same for uplink and downlink packets, * so let's annotate this. */ pctx->u.v0.tid = nla_get_u64(info->attrs[GTPA_TID]); pctx->u.v0.flow = nla_get_u16(info->attrs[GTPA_FLOW]); break; case GTP_V1: pctx->u.v1.i_tei = nla_get_u32(info->attrs[GTPA_I_TEI]); pctx->u.v1.o_tei = nla_get_u32(info->attrs[GTPA_O_TEI]); break; default: break; } } static void ip_pdp_peer_fill(struct pdp_ctx *pctx, struct genl_info *info) { if (info->attrs[GTPA_PEER_ADDRESS]) { pctx->peer.addr.s_addr = nla_get_be32(info->attrs[GTPA_PEER_ADDRESS]); } else if (info->attrs[GTPA_PEER_ADDR6]) { pctx->peer.addr6 = nla_get_in6_addr(info->attrs[GTPA_PEER_ADDR6]); } } static void ipv4_pdp_fill(struct pdp_ctx *pctx, struct genl_info *info) { ip_pdp_peer_fill(pctx, info); pctx->ms.addr.s_addr = nla_get_be32(info->attrs[GTPA_MS_ADDRESS]); gtp_pdp_fill(pctx, info); } static bool ipv6_pdp_fill(struct pdp_ctx *pctx, struct genl_info *info) { ip_pdp_peer_fill(pctx, info); pctx->ms.addr6 = nla_get_in6_addr(info->attrs[GTPA_MS_ADDR6]); if (pctx->ms.addr6.s6_addr32[2] || pctx->ms.addr6.s6_addr32[3]) return false; gtp_pdp_fill(pctx, info); return true; } static struct pdp_ctx *gtp_pdp_add(struct gtp_dev *gtp, struct sock *sk, struct genl_info *info) { struct pdp_ctx *pctx, *pctx_tid = NULL; struct net_device *dev = gtp->dev; u32 hash_ms, hash_tid = 0; struct in6_addr ms_addr6; unsigned int version; bool found = false; __be32 ms_addr; int family; version = nla_get_u32(info->attrs[GTPA_VERSION]); family = nla_get_u8_default(info->attrs[GTPA_FAMILY], AF_INET); #if !IS_ENABLED(CONFIG_IPV6) if (family == AF_INET6) return ERR_PTR(-EAFNOSUPPORT); #endif if (!info->attrs[GTPA_PEER_ADDRESS] && !info->attrs[GTPA_PEER_ADDR6]) return ERR_PTR(-EINVAL); if ((info->attrs[GTPA_PEER_ADDRESS] && sk->sk_family == AF_INET6) || (info->attrs[GTPA_PEER_ADDR6] && sk->sk_family == AF_INET)) return ERR_PTR(-EAFNOSUPPORT); switch (family) { case AF_INET: if (!info->attrs[GTPA_MS_ADDRESS] || info->attrs[GTPA_MS_ADDR6]) return ERR_PTR(-EINVAL); ms_addr = nla_get_be32(info->attrs[GTPA_MS_ADDRESS]); hash_ms = ipv4_hashfn(ms_addr) % gtp->hash_size; pctx = ipv4_pdp_find(gtp, ms_addr); break; case AF_INET6: if (!info->attrs[GTPA_MS_ADDR6] || info->attrs[GTPA_MS_ADDRESS]) return ERR_PTR(-EINVAL); ms_addr6 = nla_get_in6_addr(info->attrs[GTPA_MS_ADDR6]); hash_ms = ipv6_hashfn(&ms_addr6) % gtp->hash_size; pctx = ipv6_pdp_find(gtp, &ms_addr6); break; default: return ERR_PTR(-EAFNOSUPPORT); } if (pctx) found = true; if (version == GTP_V0) pctx_tid = gtp0_pdp_find(gtp, nla_get_u64(info->attrs[GTPA_TID]), family); else if (version == GTP_V1) pctx_tid = gtp1_pdp_find(gtp, nla_get_u32(info->attrs[GTPA_I_TEI]), family); if (pctx_tid) found = true; if (found) { if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) return ERR_PTR(-EEXIST); if (info->nlhdr->nlmsg_flags & NLM_F_REPLACE) return ERR_PTR(-EOPNOTSUPP); if (pctx && pctx_tid) return ERR_PTR(-EEXIST); if (!pctx) pctx = pctx_tid; switch (pctx->af) { case AF_INET: ipv4_pdp_fill(pctx, info); break; case AF_INET6: if (!ipv6_pdp_fill(pctx, info)) return ERR_PTR(-EADDRNOTAVAIL); break; } if (pctx->gtp_version == GTP_V0) netdev_dbg(dev, "GTPv0-U: update tunnel id = %llx (pdp %p)\n", pctx->u.v0.tid, pctx); else if (pctx->gtp_version == GTP_V1) netdev_dbg(dev, "GTPv1-U: update tunnel id = %x/%x (pdp %p)\n", pctx->u.v1.i_tei, pctx->u.v1.o_tei, pctx); return pctx; } pctx = kmalloc(sizeof(*pctx), GFP_ATOMIC); if (pctx == NULL) return ERR_PTR(-ENOMEM); sock_hold(sk); pctx->sk = sk; pctx->dev = gtp->dev; pctx->af = family; switch (pctx->af) { case AF_INET: if (!info->attrs[GTPA_MS_ADDRESS]) { sock_put(sk); kfree(pctx); return ERR_PTR(-EINVAL); } ipv4_pdp_fill(pctx, info); break; case AF_INET6: if (!info->attrs[GTPA_MS_ADDR6]) { sock_put(sk); kfree(pctx); return ERR_PTR(-EINVAL); } if (!ipv6_pdp_fill(pctx, info)) { sock_put(sk); kfree(pctx); return ERR_PTR(-EADDRNOTAVAIL); } break; } atomic_set(&pctx->tx_seq, 0); switch (pctx->gtp_version) { case GTP_V0: /* TS 09.60: "The flow label identifies unambiguously a GTP * flow.". We use the tid for this instead, I cannot find a * situation in which this doesn't unambiguosly identify the * PDP context. */ hash_tid = gtp0_hashfn(pctx->u.v0.tid) % gtp->hash_size; break; case GTP_V1: hash_tid = gtp1u_hashfn(pctx->u.v1.i_tei) % gtp->hash_size; break; } hlist_add_head_rcu(&pctx->hlist_addr, &gtp->addr_hash[hash_ms]); hlist_add_head_rcu(&pctx->hlist_tid, &gtp->tid_hash[hash_tid]); switch (pctx->gtp_version) { case GTP_V0: netdev_dbg(dev, "GTPv0-U: new PDP ctx id=%llx ssgn=%pI4 ms=%pI4 (pdp=%p)\n", pctx->u.v0.tid, &pctx->peer.addr, &pctx->ms.addr, pctx); break; case GTP_V1: netdev_dbg(dev, "GTPv1-U: new PDP ctx id=%x/%x ssgn=%pI4 ms=%pI4 (pdp=%p)\n", pctx->u.v1.i_tei, pctx->u.v1.o_tei, &pctx->peer.addr, &pctx->ms.addr, pctx); break; } return pctx; } static void pdp_context_free(struct rcu_head *head) { struct pdp_ctx *pctx = container_of(head, struct pdp_ctx, rcu_head); sock_put(pctx->sk); kfree(pctx); } static void pdp_context_delete(struct pdp_ctx *pctx) { hlist_del_rcu(&pctx->hlist_tid); hlist_del_rcu(&pctx->hlist_addr); call_rcu(&pctx->rcu_head, pdp_context_free); } static int gtp_tunnel_notify(struct pdp_ctx *pctx, u8 cmd, gfp_t allocation); static int gtp_genl_new_pdp(struct sk_buff *skb, struct genl_info *info) { unsigned int version; struct pdp_ctx *pctx; struct gtp_dev *gtp; struct sock *sk; int err; if (!info->attrs[GTPA_VERSION] || !info->attrs[GTPA_LINK]) return -EINVAL; version = nla_get_u32(info->attrs[GTPA_VERSION]); switch (version) { case GTP_V0: if (!info->attrs[GTPA_TID] || !info->attrs[GTPA_FLOW]) return -EINVAL; break; case GTP_V1: if (!info->attrs[GTPA_I_TEI] || !info->attrs[GTPA_O_TEI]) return -EINVAL; break; default: return -EINVAL; } rtnl_lock(); gtp = gtp_find_dev(sock_net(skb->sk), info->attrs); if (!gtp) { err = -ENODEV; goto out_unlock; } if (version == GTP_V0) sk = gtp->sk0; else if (version == GTP_V1) sk = gtp->sk1u; else sk = NULL; if (!sk) { err = -ENODEV; goto out_unlock; } pctx = gtp_pdp_add(gtp, sk, info); if (IS_ERR(pctx)) { err = PTR_ERR(pctx); } else { gtp_tunnel_notify(pctx, GTP_CMD_NEWPDP, GFP_KERNEL); err = 0; } out_unlock: rtnl_unlock(); return err; } static struct pdp_ctx *gtp_find_pdp_by_link(struct net *net, struct nlattr *nla[]) { struct gtp_dev *gtp; int family; family = nla_get_u8_default(nla[GTPA_FAMILY], AF_INET); gtp = gtp_find_dev(net, nla); if (!gtp) return ERR_PTR(-ENODEV); if (nla[GTPA_MS_ADDRESS]) { __be32 ip = nla_get_be32(nla[GTPA_MS_ADDRESS]); if (family != AF_INET) return ERR_PTR(-EINVAL); return ipv4_pdp_find(gtp, ip); } else if (nla[GTPA_MS_ADDR6]) { struct in6_addr addr = nla_get_in6_addr(nla[GTPA_MS_ADDR6]); if (family != AF_INET6) return ERR_PTR(-EINVAL); if (addr.s6_addr32[2] || addr.s6_addr32[3]) return ERR_PTR(-EADDRNOTAVAIL); return ipv6_pdp_find(gtp, &addr); } else if (nla[GTPA_VERSION]) { u32 gtp_version = nla_get_u32(nla[GTPA_VERSION]); if (gtp_version == GTP_V0 && nla[GTPA_TID]) { return gtp0_pdp_find(gtp, nla_get_u64(nla[GTPA_TID]), family); } else if (gtp_version == GTP_V1 && nla[GTPA_I_TEI]) { return gtp1_pdp_find(gtp, nla_get_u32(nla[GTPA_I_TEI]), family); } } return ERR_PTR(-EINVAL); } static struct pdp_ctx *gtp_find_pdp(struct net *net, struct nlattr *nla[]) { struct pdp_ctx *pctx; if (nla[GTPA_LINK]) pctx = gtp_find_pdp_by_link(net, nla); else pctx = ERR_PTR(-EINVAL); if (!pctx) pctx = ERR_PTR(-ENOENT); return pctx; } static int gtp_genl_del_pdp(struct sk_buff *skb, struct genl_info *info) { struct pdp_ctx *pctx; int err = 0; if (!info->attrs[GTPA_VERSION]) return -EINVAL; rcu_read_lock(); pctx = gtp_find_pdp(sock_net(skb->sk), info->attrs); if (IS_ERR(pctx)) { err = PTR_ERR(pctx); goto out_unlock; } if (pctx->gtp_version == GTP_V0) netdev_dbg(pctx->dev, "GTPv0-U: deleting tunnel id = %llx (pdp %p)\n", pctx->u.v0.tid, pctx); else if (pctx->gtp_version == GTP_V1) netdev_dbg(pctx->dev, "GTPv1-U: deleting tunnel id = %x/%x (pdp %p)\n", pctx->u.v1.i_tei, pctx->u.v1.o_tei, pctx); gtp_tunnel_notify(pctx, GTP_CMD_DELPDP, GFP_ATOMIC); pdp_context_delete(pctx); out_unlock: rcu_read_unlock(); return err; } static int gtp_genl_fill_info(struct sk_buff *skb, u32 snd_portid, u32 snd_seq, int flags, u32 type, struct pdp_ctx *pctx) { void *genlh; genlh = genlmsg_put(skb, snd_portid, snd_seq, &gtp_genl_family, flags, type); if (genlh == NULL) goto nlmsg_failure; if (nla_put_u32(skb, GTPA_VERSION, pctx->gtp_version) || nla_put_u32(skb, GTPA_LINK, pctx->dev->ifindex) || nla_put_u8(skb, GTPA_FAMILY, pctx->af)) goto nla_put_failure; switch (pctx->af) { case AF_INET: if (nla_put_be32(skb, GTPA_MS_ADDRESS, pctx->ms.addr.s_addr)) goto nla_put_failure; break; case AF_INET6: if (nla_put_in6_addr(skb, GTPA_MS_ADDR6, &pctx->ms.addr6)) goto nla_put_failure; break; } switch (pctx->sk->sk_family) { case AF_INET: if (nla_put_be32(skb, GTPA_PEER_ADDRESS, pctx->peer.addr.s_addr)) goto nla_put_failure; break; case AF_INET6: if (nla_put_in6_addr(skb, GTPA_PEER_ADDR6, &pctx->peer.addr6)) goto nla_put_failure; break; } switch (pctx->gtp_version) { case GTP_V0: if (nla_put_u64_64bit(skb, GTPA_TID, pctx->u.v0.tid, GTPA_PAD) || nla_put_u16(skb, GTPA_FLOW, pctx->u.v0.flow)) goto nla_put_failure; break; case GTP_V1: if (nla_put_u32(skb, GTPA_I_TEI, pctx->u.v1.i_tei) || nla_put_u32(skb, GTPA_O_TEI, pctx->u.v1.o_tei)) goto nla_put_failure; break; } genlmsg_end(skb, genlh); return 0; nlmsg_failure: nla_put_failure: genlmsg_cancel(skb, genlh); return -EMSGSIZE; } static int gtp_tunnel_notify(struct pdp_ctx *pctx, u8 cmd, gfp_t allocation) { struct sk_buff *msg; int ret; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, allocation); if (!msg) return -ENOMEM; ret = gtp_genl_fill_info(msg, 0, 0, 0, cmd, pctx); if (ret < 0) { nlmsg_free(msg); return ret; } ret = genlmsg_multicast_netns(&gtp_genl_family, dev_net(pctx->dev), msg, 0, GTP_GENL_MCGRP, GFP_ATOMIC); return ret; } static int gtp_genl_get_pdp(struct sk_buff *skb, struct genl_info *info) { struct pdp_ctx *pctx = NULL; struct sk_buff *skb2; int err; if (!info->attrs[GTPA_VERSION]) return -EINVAL; rcu_read_lock(); pctx = gtp_find_pdp(sock_net(skb->sk), info->attrs); if (IS_ERR(pctx)) { err = PTR_ERR(pctx); goto err_unlock; } skb2 = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); if (skb2 == NULL) { err = -ENOMEM; goto err_unlock; } err = gtp_genl_fill_info(skb2, NETLINK_CB(skb).portid, info->snd_seq, 0, info->nlhdr->nlmsg_type, pctx); if (err < 0) goto err_unlock_free; rcu_read_unlock(); return genlmsg_unicast(genl_info_net(info), skb2, info->snd_portid); err_unlock_free: kfree_skb(skb2); err_unlock: rcu_read_unlock(); return err; } static int gtp_genl_dump_pdp(struct sk_buff *skb, struct netlink_callback *cb) { struct gtp_dev *last_gtp = (struct gtp_dev *)cb->args[2], *gtp; int i, j, bucket = cb->args[0], skip = cb->args[1]; struct net *net = sock_net(skb->sk); struct net_device *dev; struct pdp_ctx *pctx; if (cb->args[4]) return 0; rcu_read_lock(); for_each_netdev_rcu(net, dev) { if (dev->rtnl_link_ops != &gtp_link_ops) continue; gtp = netdev_priv(dev); if (last_gtp && last_gtp != gtp) continue; else last_gtp = NULL; for (i = bucket; i < gtp->hash_size; i++) { j = 0; hlist_for_each_entry_rcu(pctx, &gtp->tid_hash[i], hlist_tid) { if (j >= skip && gtp_genl_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh->nlmsg_type, pctx)) { cb->args[0] = i; cb->args[1] = j; cb->args[2] = (unsigned long)gtp; goto out; } j++; } skip = 0; } bucket = 0; } cb->args[4] = 1; out: rcu_read_unlock(); return skb->len; } static int gtp_genl_send_echo_req(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *skb_to_send; __be32 src_ip, dst_ip; unsigned int version; struct gtp_dev *gtp; struct flowi4 fl4; struct rtable *rt; struct sock *sk; __be16 port; int len; if (!info->attrs[GTPA_VERSION] || !info->attrs[GTPA_LINK] || !info->attrs[GTPA_PEER_ADDRESS] || !info->attrs[GTPA_MS_ADDRESS]) return -EINVAL; version = nla_get_u32(info->attrs[GTPA_VERSION]); dst_ip = nla_get_be32(info->attrs[GTPA_PEER_ADDRESS]); src_ip = nla_get_be32(info->attrs[GTPA_MS_ADDRESS]); gtp = gtp_find_dev(sock_net(skb->sk), info->attrs); if (!gtp) return -ENODEV; if (!gtp->sk_created) return -EOPNOTSUPP; if (!(gtp->dev->flags & IFF_UP)) return -ENETDOWN; if (version == GTP_V0) { struct gtp0_header *gtp0_h; len = LL_RESERVED_SPACE(gtp->dev) + sizeof(struct gtp0_header) + sizeof(struct iphdr) + sizeof(struct udphdr); skb_to_send = netdev_alloc_skb_ip_align(gtp->dev, len); if (!skb_to_send) return -ENOMEM; sk = gtp->sk0; port = htons(GTP0_PORT); gtp0_h = skb_push(skb_to_send, sizeof(struct gtp0_header)); memset(gtp0_h, 0, sizeof(struct gtp0_header)); gtp0_build_echo_msg(gtp0_h, GTP_ECHO_REQ); } else if (version == GTP_V1) { struct gtp1_header_long *gtp1u_h; len = LL_RESERVED_SPACE(gtp->dev) + sizeof(struct gtp1_header_long) + sizeof(struct iphdr) + sizeof(struct udphdr); skb_to_send = netdev_alloc_skb_ip_align(gtp->dev, len); if (!skb_to_send) return -ENOMEM; sk = gtp->sk1u; port = htons(GTP1U_PORT); gtp1u_h = skb_push(skb_to_send, sizeof(struct gtp1_header_long)); memset(gtp1u_h, 0, sizeof(struct gtp1_header_long)); gtp1u_build_echo_msg(gtp1u_h, GTP_ECHO_REQ); } else { return -ENODEV; } rt = ip4_route_output_gtp(&fl4, sk, dst_ip, src_ip); if (IS_ERR(rt)) { netdev_dbg(gtp->dev, "no route for echo request to %pI4\n", &dst_ip); kfree_skb(skb_to_send); return -ENODEV; } udp_tunnel_xmit_skb(rt, sk, skb_to_send, fl4.saddr, fl4.daddr, inet_dscp_to_dsfield(fl4.flowi4_dscp), ip4_dst_hoplimit(&rt->dst), 0, port, port, !net_eq(sock_net(sk), dev_net(gtp->dev)), false, 0); return 0; } static const struct nla_policy gtp_genl_policy[GTPA_MAX + 1] = { [GTPA_LINK] = { .type = NLA_U32, }, [GTPA_VERSION] = { .type = NLA_U32, }, [GTPA_TID] = { .type = NLA_U64, }, [GTPA_PEER_ADDRESS] = { .type = NLA_U32, }, [GTPA_MS_ADDRESS] = { .type = NLA_U32, }, [GTPA_FLOW] = { .type = NLA_U16, }, [GTPA_NET_NS_FD] = { .type = NLA_U32, }, [GTPA_I_TEI] = { .type = NLA_U32, }, [GTPA_O_TEI] = { .type = NLA_U32, }, [GTPA_PEER_ADDR6] = { .len = sizeof(struct in6_addr), }, [GTPA_MS_ADDR6] = { .len = sizeof(struct in6_addr), }, [GTPA_FAMILY] = { .type = NLA_U8, }, }; static const struct genl_small_ops gtp_genl_ops[] = { { .cmd = GTP_CMD_NEWPDP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = gtp_genl_new_pdp, .flags = GENL_ADMIN_PERM, }, { .cmd = GTP_CMD_DELPDP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = gtp_genl_del_pdp, .flags = GENL_ADMIN_PERM, }, { .cmd = GTP_CMD_GETPDP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = gtp_genl_get_pdp, .dumpit = gtp_genl_dump_pdp, .flags = GENL_ADMIN_PERM, }, { .cmd = GTP_CMD_ECHOREQ, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = gtp_genl_send_echo_req, .flags = GENL_ADMIN_PERM, }, }; static struct genl_family gtp_genl_family __ro_after_init = { .name = "gtp", .version = 0, .hdrsize = 0, .maxattr = GTPA_MAX, .policy = gtp_genl_policy, .netnsok = true, .module = THIS_MODULE, .small_ops = gtp_genl_ops, .n_small_ops = ARRAY_SIZE(gtp_genl_ops), .resv_start_op = GTP_CMD_ECHOREQ + 1, .mcgrps = gtp_genl_mcgrps, .n_mcgrps = ARRAY_SIZE(gtp_genl_mcgrps), }; static int __net_init gtp_net_init(struct net *net) { struct gtp_net *gn = net_generic(net, gtp_net_id); INIT_LIST_HEAD(&gn->gtp_dev_list); return 0; } static void __net_exit gtp_net_exit_rtnl(struct net *net, struct list_head *dev_to_kill) { struct gtp_net *gn = net_generic(net, gtp_net_id); struct gtp_dev *gtp, *gtp_next; list_for_each_entry_safe(gtp, gtp_next, &gn->gtp_dev_list, list) gtp_dellink(gtp->dev, dev_to_kill); } static struct pernet_operations gtp_net_ops = { .init = gtp_net_init, .exit_rtnl = gtp_net_exit_rtnl, .id = &gtp_net_id, .size = sizeof(struct gtp_net), }; static int __init gtp_init(void) { int err; get_random_bytes(&gtp_h_initval, sizeof(gtp_h_initval)); err = register_pernet_subsys(&gtp_net_ops); if (err < 0) goto error_out; err = rtnl_link_register(&gtp_link_ops); if (err < 0) goto unreg_pernet_subsys; err = genl_register_family(&gtp_genl_family); if (err < 0) goto unreg_rtnl_link; pr_info("GTP module loaded (pdp ctx size %zd bytes)\n", sizeof(struct pdp_ctx)); return 0; unreg_rtnl_link: rtnl_link_unregister(&gtp_link_ops); unreg_pernet_subsys: unregister_pernet_subsys(&gtp_net_ops); error_out: pr_err("error loading GTP module loaded\n"); return err; } late_initcall(gtp_init); static void __exit gtp_fini(void) { genl_unregister_family(&gtp_genl_family); rtnl_link_unregister(&gtp_link_ops); unregister_pernet_subsys(&gtp_net_ops); pr_info("GTP module unloaded\n"); } module_exit(gtp_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <hwelte@sysmocom.de>"); MODULE_DESCRIPTION("Interface driver for GTP encapsulated traffic"); MODULE_ALIAS_RTNL_LINK("gtp"); MODULE_ALIAS_GENL_FAMILY("gtp");
4 1 3 4 2 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 // SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) /* Copyright(c) 2014 - 2020 Intel Corporation */ #include <linux/mutex.h> #include <linux/list.h> #include "adf_cfg.h" #include "adf_common_drv.h" static LIST_HEAD(accel_table); static LIST_HEAD(vfs_table); static DEFINE_MUTEX(table_lock); static u32 num_devices; static u8 id_map[ADF_MAX_DEVICES]; struct vf_id_map { u32 bdf; u32 id; u32 fake_id; bool attached; struct list_head list; }; static int adf_get_vf_id(struct adf_accel_dev *vf) { return ((7 * (PCI_SLOT(accel_to_pci_dev(vf)->devfn) - 1)) + PCI_FUNC(accel_to_pci_dev(vf)->devfn) + (PCI_SLOT(accel_to_pci_dev(vf)->devfn) - 1)); } static int adf_get_vf_num(struct adf_accel_dev *vf) { return (accel_to_pci_dev(vf)->bus->number << 8) | adf_get_vf_id(vf); } static struct vf_id_map *adf_find_vf(u32 bdf) { struct list_head *itr; list_for_each(itr, &vfs_table) { struct vf_id_map *ptr = list_entry(itr, struct vf_id_map, list); if (ptr->bdf == bdf) return ptr; } return NULL; } static int adf_get_vf_real_id(u32 fake) { struct list_head *itr; list_for_each(itr, &vfs_table) { struct vf_id_map *ptr = list_entry(itr, struct vf_id_map, list); if (ptr->fake_id == fake) return ptr->id; } return -1; } /** * adf_clean_vf_map() - Cleans VF id mappings * @vf: flag indicating whether mappings is cleaned * for vfs only or for vfs and pfs * * Function cleans internal ids for virtual functions. */ void adf_clean_vf_map(bool vf) { struct vf_id_map *map; struct list_head *ptr, *tmp; mutex_lock(&table_lock); list_for_each_safe(ptr, tmp, &vfs_table) { map = list_entry(ptr, struct vf_id_map, list); if (map->bdf != -1) { id_map[map->id] = 0; num_devices--; } if (vf && map->bdf == -1) continue; list_del(ptr); kfree(map); } mutex_unlock(&table_lock); } EXPORT_SYMBOL_GPL(adf_clean_vf_map); /** * adf_devmgr_update_class_index() - Update internal index * @hw_data: Pointer to internal device data. * * Function updates internal dev index for VFs */ void adf_devmgr_update_class_index(struct adf_hw_device_data *hw_data) { struct adf_hw_device_class *class = hw_data->dev_class; struct list_head *itr; int i = 0; list_for_each(itr, &accel_table) { struct adf_accel_dev *ptr = list_entry(itr, struct adf_accel_dev, list); if (ptr->hw_device->dev_class == class) ptr->hw_device->instance_id = i++; if (i == class->instances) break; } } EXPORT_SYMBOL_GPL(adf_devmgr_update_class_index); static unsigned int adf_find_free_id(void) { unsigned int i; for (i = 0; i < ADF_MAX_DEVICES; i++) { if (!id_map[i]) { id_map[i] = 1; return i; } } return ADF_MAX_DEVICES + 1; } /** * adf_devmgr_add_dev() - Add accel_dev to the acceleration framework * @accel_dev: Pointer to acceleration device. * @pf: Corresponding PF if the accel_dev is a VF * * Function adds acceleration device to the acceleration framework. * To be used by QAT device specific drivers. * * Return: 0 on success, error code otherwise. */ int adf_devmgr_add_dev(struct adf_accel_dev *accel_dev, struct adf_accel_dev *pf) { struct list_head *itr; int ret = 0; if (num_devices == ADF_MAX_DEVICES) { dev_err(&GET_DEV(accel_dev), "Only support up to %d devices\n", ADF_MAX_DEVICES); return -EFAULT; } mutex_lock(&table_lock); atomic_set(&accel_dev->ref_count, 0); /* PF on host or VF on guest - optimized to remove redundant is_vf */ if (!accel_dev->is_vf || !pf) { struct vf_id_map *map; list_for_each(itr, &accel_table) { struct adf_accel_dev *ptr = list_entry(itr, struct adf_accel_dev, list); if (ptr == accel_dev) { ret = -EEXIST; goto unlock; } } list_add_tail(&accel_dev->list, &accel_table); accel_dev->accel_id = adf_find_free_id(); if (accel_dev->accel_id > ADF_MAX_DEVICES) { ret = -EFAULT; goto unlock; } num_devices++; map = kzalloc(sizeof(*map), GFP_KERNEL); if (!map) { ret = -ENOMEM; goto unlock; } map->bdf = ~0; map->id = accel_dev->accel_id; map->fake_id = map->id; map->attached = true; list_add_tail(&map->list, &vfs_table); } else if (accel_dev->is_vf && pf) { /* VF on host */ struct vf_id_map *map; map = adf_find_vf(adf_get_vf_num(accel_dev)); if (map) { struct vf_id_map *next; accel_dev->accel_id = map->id; list_add_tail(&accel_dev->list, &accel_table); map->fake_id++; map->attached = true; next = list_next_entry(map, list); while (next && &next->list != &vfs_table) { next->fake_id++; next = list_next_entry(next, list); } ret = 0; goto unlock; } map = kzalloc(sizeof(*map), GFP_KERNEL); if (!map) { ret = -ENOMEM; goto unlock; } accel_dev->accel_id = adf_find_free_id(); if (accel_dev->accel_id > ADF_MAX_DEVICES) { kfree(map); ret = -EFAULT; goto unlock; } num_devices++; list_add_tail(&accel_dev->list, &accel_table); map->bdf = adf_get_vf_num(accel_dev); map->id = accel_dev->accel_id; map->fake_id = map->id; map->attached = true; list_add_tail(&map->list, &vfs_table); } mutex_init(&accel_dev->state_lock); unlock: mutex_unlock(&table_lock); return ret; } EXPORT_SYMBOL_GPL(adf_devmgr_add_dev); struct list_head *adf_devmgr_get_head(void) { return &accel_table; } /** * adf_devmgr_rm_dev() - Remove accel_dev from the acceleration framework. * @accel_dev: Pointer to acceleration device. * @pf: Corresponding PF if the accel_dev is a VF * * Function removes acceleration device from the acceleration framework. * To be used by QAT device specific drivers. * * Return: void */ void adf_devmgr_rm_dev(struct adf_accel_dev *accel_dev, struct adf_accel_dev *pf) { mutex_lock(&table_lock); /* PF on host or VF on guest - optimized to remove redundant is_vf */ if (!accel_dev->is_vf || !pf) { id_map[accel_dev->accel_id] = 0; num_devices--; } else if (accel_dev->is_vf && pf) { struct vf_id_map *map, *next; map = adf_find_vf(adf_get_vf_num(accel_dev)); if (!map) { dev_err(&GET_DEV(accel_dev), "Failed to find VF map\n"); goto unlock; } map->fake_id--; map->attached = false; next = list_next_entry(map, list); while (next && &next->list != &vfs_table) { next->fake_id--; next = list_next_entry(next, list); } } unlock: mutex_destroy(&accel_dev->state_lock); list_del(&accel_dev->list); mutex_unlock(&table_lock); } EXPORT_SYMBOL_GPL(adf_devmgr_rm_dev); /** * adf_devmgr_pci_to_accel_dev() - Get accel_dev associated with the pci_dev. * @pci_dev: Pointer to PCI device. * * Function returns acceleration device associated with the given PCI device. * To be used by QAT device specific drivers. * * Return: pointer to accel_dev or NULL if not found. */ struct adf_accel_dev *adf_devmgr_pci_to_accel_dev(struct pci_dev *pci_dev) { struct list_head *itr; mutex_lock(&table_lock); list_for_each(itr, &accel_table) { struct adf_accel_dev *ptr = list_entry(itr, struct adf_accel_dev, list); if (ptr->accel_pci_dev.pci_dev == pci_dev) { mutex_unlock(&table_lock); return ptr; } } mutex_unlock(&table_lock); return NULL; } EXPORT_SYMBOL_GPL(adf_devmgr_pci_to_accel_dev); struct adf_accel_dev *adf_devmgr_get_dev_by_id(u32 id) { struct list_head *itr; int real_id; mutex_lock(&table_lock); real_id = adf_get_vf_real_id(id); if (real_id < 0) goto unlock; id = real_id; list_for_each(itr, &accel_table) { struct adf_accel_dev *ptr = list_entry(itr, struct adf_accel_dev, list); if (ptr->accel_id == id) { mutex_unlock(&table_lock); return ptr; } } unlock: mutex_unlock(&table_lock); return NULL; } int adf_devmgr_verify_id(u32 id) { if (id == ADF_CFG_ALL_DEVICES) return 0; if (adf_devmgr_get_dev_by_id(id)) return 0; return -ENODEV; } static int adf_get_num_dettached_vfs(void) { struct list_head *itr; int vfs = 0; mutex_lock(&table_lock); list_for_each(itr, &vfs_table) { struct vf_id_map *ptr = list_entry(itr, struct vf_id_map, list); if (ptr->bdf != ~0 && !ptr->attached) vfs++; } mutex_unlock(&table_lock); return vfs; } void adf_devmgr_get_num_dev(u32 *num) { *num = num_devices - adf_get_num_dettached_vfs(); } /** * adf_dev_in_use() - Check whether accel_dev is currently in use * @accel_dev: Pointer to acceleration device. * * To be used by QAT device specific drivers. * * Return: 1 when device is in use, 0 otherwise. */ int adf_dev_in_use(struct adf_accel_dev *accel_dev) { return atomic_read(&accel_dev->ref_count) != 0; } EXPORT_SYMBOL_GPL(adf_dev_in_use); /** * adf_dev_get() - Increment accel_dev reference count * @accel_dev: Pointer to acceleration device. * * Increment the accel_dev refcount and if this is the first time * incrementing it during this period the accel_dev is in use, * increment the module refcount too. * To be used by QAT device specific drivers. * * Return: 0 when successful, EFAULT when fail to bump module refcount */ int adf_dev_get(struct adf_accel_dev *accel_dev) { if (atomic_add_return(1, &accel_dev->ref_count) == 1) if (!try_module_get(accel_dev->owner)) return -EFAULT; return 0; } EXPORT_SYMBOL_GPL(adf_dev_get); /** * adf_dev_put() - Decrement accel_dev reference count * @accel_dev: Pointer to acceleration device. * * Decrement the accel_dev refcount and if this is the last time * decrementing it during this period the accel_dev is in use, * decrement the module refcount too. * To be used by QAT device specific drivers. * * Return: void */ void adf_dev_put(struct adf_accel_dev *accel_dev) { if (atomic_sub_return(1, &accel_dev->ref_count) == 0) module_put(accel_dev->owner); } EXPORT_SYMBOL_GPL(adf_dev_put); /** * adf_devmgr_in_reset() - Check whether device is in reset * @accel_dev: Pointer to acceleration device. * * To be used by QAT device specific drivers. * * Return: 1 when the device is being reset, 0 otherwise. */ int adf_devmgr_in_reset(struct adf_accel_dev *accel_dev) { return test_bit(ADF_STATUS_RESTARTING, &accel_dev->status); } EXPORT_SYMBOL_GPL(adf_devmgr_in_reset); /** * adf_dev_started() - Check whether device has started * @accel_dev: Pointer to acceleration device. * * To be used by QAT device specific drivers. * * Return: 1 when the device has started, 0 otherwise */ int adf_dev_started(struct adf_accel_dev *accel_dev) { return test_bit(ADF_STATUS_STARTED, &accel_dev->status); } EXPORT_SYMBOL_GPL(adf_dev_started);
1 12 2 1 13 13 28 21 11 29 29 29 21 11 28 3 23 4 12 10 10 16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 // SPDX-License-Identifier: GPL-2.0-or-later /* * fs/inotify_user.c - inotify support for userspace * * Authors: * John McCutchan <ttb@tentacle.dhs.org> * Robert Love <rml@novell.com> * * Copyright (C) 2005 John McCutchan * Copyright 2006 Hewlett-Packard Development Company, L.P. * * Copyright (C) 2009 Eric Paris <Red Hat Inc> * inotify was largely rewritten to make use of the fsnotify infrastructure */ #include <linux/dcache.h> /* d_unlinked */ #include <linux/fs.h> /* struct inode */ #include <linux/fsnotify_backend.h> #include <linux/inotify.h> #include <linux/path.h> /* struct path */ #include <linux/slab.h> /* kmem_* */ #include <linux/types.h> #include <linux/sched.h> #include <linux/sched/user.h> #include <linux/sched/mm.h> #include "inotify.h" /* * Check if 2 events contain the same information. */ static bool event_compare(struct fsnotify_event *old_fsn, struct fsnotify_event *new_fsn) { struct inotify_event_info *old, *new; old = INOTIFY_E(old_fsn); new = INOTIFY_E(new_fsn); if (old->mask & FS_IN_IGNORED) return false; if ((old->mask == new->mask) && (old->wd == new->wd) && (old->name_len == new->name_len) && (!old->name_len || !strcmp(old->name, new->name))) return true; return false; } static int inotify_merge(struct fsnotify_group *group, struct fsnotify_event *event) { struct list_head *list = &group->notification_list; struct fsnotify_event *last_event; last_event = list_entry(list->prev, struct fsnotify_event, list); return event_compare(last_event, event); } int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask, struct inode *inode, struct inode *dir, const struct qstr *name, u32 cookie) { struct inotify_inode_mark *i_mark; struct inotify_event_info *event; struct fsnotify_event *fsn_event; struct fsnotify_group *group = inode_mark->group; int ret; int len = 0, wd; int alloc_len = sizeof(struct inotify_event_info); struct mem_cgroup *old_memcg; if (name) { len = name->len; alloc_len += len + 1; } pr_debug("%s: group=%p mark=%p mask=%x\n", __func__, group, inode_mark, mask); i_mark = container_of(inode_mark, struct inotify_inode_mark, fsn_mark); /* * We can be racing with mark being detached. Don't report event with * invalid wd. */ wd = READ_ONCE(i_mark->wd); if (wd == -1) return 0; /* * Whoever is interested in the event, pays for the allocation. Do not * trigger OOM killer in the target monitoring memcg as it may have * security repercussion. */ old_memcg = set_active_memcg(group->memcg); event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL); set_active_memcg(old_memcg); if (unlikely(!event)) { /* * Treat lost event due to ENOMEM the same way as queue * overflow to let userspace know event was lost. */ fsnotify_queue_overflow(group); return -ENOMEM; } /* * We now report FS_ISDIR flag with MOVE_SELF and DELETE_SELF events * for fanotify. inotify never reported IN_ISDIR with those events. * It looks like an oversight, but to avoid the risk of breaking * existing inotify programs, mask the flag out from those events. */ if (mask & (IN_MOVE_SELF | IN_DELETE_SELF)) mask &= ~IN_ISDIR; fsn_event = &event->fse; fsnotify_init_event(fsn_event); event->mask = mask; event->wd = wd; event->sync_cookie = cookie; event->name_len = len; if (len) strscpy(event->name, name->name, event->name_len + 1); ret = fsnotify_add_event(group, fsn_event, inotify_merge); if (ret) { /* Our event wasn't used in the end. Free it. */ fsnotify_destroy_event(group, fsn_event); } if (inode_mark->flags & FSNOTIFY_MARK_FLAG_IN_ONESHOT) fsnotify_destroy_mark(inode_mark, group); return 0; } static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group) { inotify_ignored_and_remove_idr(fsn_mark, group); } /* * This is NEVER supposed to be called. Inotify marks should either have been * removed from the idr when the watch was removed or in the * fsnotify_destroy_mark_by_group() call when the inotify instance was being * torn down. This is only called if the idr is about to be freed but there * are still marks in it. */ static int idr_callback(int id, void *p, void *data) { struct fsnotify_mark *fsn_mark; struct inotify_inode_mark *i_mark; static bool warned = false; if (warned) return 0; warned = true; fsn_mark = p; i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); WARN(1, "inotify closing but id=%d for fsn_mark=%p in group=%p still in " "idr. Probably leaking memory\n", id, p, data); /* * I'm taking the liberty of assuming that the mark in question is a * valid address and I'm dereferencing it. This might help to figure * out why we got here and the panic is no worse than the original * BUG() that was here. */ if (fsn_mark) printk(KERN_WARNING "fsn_mark->group=%p wd=%d\n", fsn_mark->group, i_mark->wd); return 0; } static void inotify_free_group_priv(struct fsnotify_group *group) { /* ideally the idr is empty and we won't hit the BUG in the callback */ idr_for_each(&group->inotify_data.idr, idr_callback, group); idr_destroy(&group->inotify_data.idr); if (group->inotify_data.ucounts) dec_inotify_instances(group->inotify_data.ucounts); } static void inotify_free_event(struct fsnotify_group *group, struct fsnotify_event *fsn_event) { kfree(INOTIFY_E(fsn_event)); } /* ding dong the mark is dead */ static void inotify_free_mark(struct fsnotify_mark *fsn_mark) { struct inotify_inode_mark *i_mark; i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); kmem_cache_free(inotify_inode_mark_cachep, i_mark); } const struct fsnotify_ops inotify_fsnotify_ops = { .handle_inode_event = inotify_handle_inode_event, .free_group_priv = inotify_free_group_priv, .free_event = inotify_free_event, .freeing_mark = inotify_freeing_mark, .free_mark = inotify_free_mark, };
219 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Supervisor Mode Access Prevention support * * Copyright (C) 2012 Intel Corporation * Author: H. Peter Anvin <hpa@linux.intel.com> */ #ifndef _ASM_X86_SMAP_H #define _ASM_X86_SMAP_H #include <asm/nops.h> #include <asm/cpufeatures.h> #include <asm/alternative.h> #ifdef __ASSEMBLER__ #define ASM_CLAC \ ALTERNATIVE "", "clac", X86_FEATURE_SMAP #define ASM_STAC \ ALTERNATIVE "", "stac", X86_FEATURE_SMAP #else /* __ASSEMBLER__ */ /* * The CLAC/STAC instructions toggle the enforcement of * X86_FEATURE_SMAP along with X86_FEATURE_LASS. * * SMAP enforcement is based on the _PAGE_BIT_USER bit in the page * tables. The kernel is not allowed to touch pages with that bit set * unless the AC bit is set. * * Use stac()/clac() when accessing userspace (_PAGE_USER) mappings, * regardless of location. * * Note: a barrier is implicit in alternative(). */ static __always_inline void clac(void) { alternative("", "clac", X86_FEATURE_SMAP); } static __always_inline void stac(void) { alternative("", "stac", X86_FEATURE_SMAP); } /* * LASS enforcement is based on bit 63 of the virtual address. The * kernel is not allowed to touch memory in the lower half of the * virtual address space. * * Use lass_stac()/lass_clac() to toggle the AC bit for kernel data * accesses (!_PAGE_USER) that are blocked by LASS, but not by SMAP. * * Even with the AC bit set, LASS will continue to block instruction * fetches from the user half of the address space. To allow those, * clear CR4.LASS to disable the LASS mechanism entirely. * * Note: a barrier is implicit in alternative(). */ static __always_inline void lass_clac(void) { alternative("", "clac", X86_FEATURE_LASS); } static __always_inline void lass_stac(void) { alternative("", "stac", X86_FEATURE_LASS); } static __always_inline unsigned long smap_save(void) { unsigned long flags; asm volatile ("# smap_save\n\t" ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE "", "pushf; pop %0; clac", X86_FEATURE_SMAP) : "=rm" (flags) : : "memory", "cc"); return flags; } static __always_inline void smap_restore(unsigned long flags) { asm volatile ("# smap_restore\n\t" ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE "", "push %0; popf", X86_FEATURE_SMAP) : : "g" (flags) : "memory", "cc"); } /* These macros can be used in asm() statements */ #define ASM_CLAC \ ALTERNATIVE("", "clac", X86_FEATURE_SMAP) #define ASM_STAC \ ALTERNATIVE("", "stac", X86_FEATURE_SMAP) #define ASM_CLAC_UNSAFE \ ALTERNATIVE("", ANNOTATE_IGNORE_ALTERNATIVE "clac", X86_FEATURE_SMAP) #define ASM_STAC_UNSAFE \ ALTERNATIVE("", ANNOTATE_IGNORE_ALTERNATIVE "stac", X86_FEATURE_SMAP) #endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_SMAP_H */
4 4 3 3 1 3 1 1 1 1 8 8 8 8 8 2 3 3 2 2 2 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * SM4 Cipher Algorithm, AES-NI/AVX optimized. * as specified in * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html * * Copyright (c) 2021, Alibaba Group. * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> */ #include <asm/fpu/api.h> #include <linux/module.h> #include <linux/crypto.h> #include <linux/export.h> #include <linux/kernel.h> #include <crypto/internal/skcipher.h> #include <crypto/sm4.h> #include "sm4-avx.h" #define SM4_CRYPT8_BLOCK_SIZE (SM4_BLOCK_SIZE * 8) asmlinkage void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst, const u8 *src, int nblocks); asmlinkage void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst, const u8 *src, int nblocks); asmlinkage void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst, const u8 *src, u8 *iv); asmlinkage void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst, const u8 *src, u8 *iv); static int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int key_len) { struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); return sm4_expandkey(ctx, key, key_len); } static int ecb_do_crypt(struct skcipher_request *req, const u32 *rkey) { struct skcipher_walk walk; unsigned int nbytes; int err; err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes) > 0) { const u8 *src = walk.src.virt.addr; u8 *dst = walk.dst.virt.addr; kernel_fpu_begin(); while (nbytes >= SM4_CRYPT8_BLOCK_SIZE) { sm4_aesni_avx_crypt8(rkey, dst, src, 8); dst += SM4_CRYPT8_BLOCK_SIZE; src += SM4_CRYPT8_BLOCK_SIZE; nbytes -= SM4_CRYPT8_BLOCK_SIZE; } while (nbytes >= SM4_BLOCK_SIZE) { unsigned int nblocks = min(nbytes >> 4, 4u); sm4_aesni_avx_crypt4(rkey, dst, src, nblocks); dst += nblocks * SM4_BLOCK_SIZE; src += nblocks * SM4_BLOCK_SIZE; nbytes -= nblocks * SM4_BLOCK_SIZE; } kernel_fpu_end(); err = skcipher_walk_done(&walk, nbytes); } return err; } int sm4_avx_ecb_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); return ecb_do_crypt(req, ctx->rkey_enc); } EXPORT_SYMBOL_GPL(sm4_avx_ecb_encrypt); int sm4_avx_ecb_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); return ecb_do_crypt(req, ctx->rkey_dec); } EXPORT_SYMBOL_GPL(sm4_avx_ecb_decrypt); int sm4_cbc_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; unsigned int nbytes; int err; err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes) > 0) { const u8 *iv = walk.iv; const u8 *src = walk.src.virt.addr; u8 *dst = walk.dst.virt.addr; while (nbytes >= SM4_BLOCK_SIZE) { crypto_xor_cpy(dst, src, iv, SM4_BLOCK_SIZE); sm4_crypt_block(ctx->rkey_enc, dst, dst); iv = dst; src += SM4_BLOCK_SIZE; dst += SM4_BLOCK_SIZE; nbytes -= SM4_BLOCK_SIZE; } if (iv != walk.iv) memcpy(walk.iv, iv, SM4_BLOCK_SIZE); err = skcipher_walk_done(&walk, nbytes); } return err; } EXPORT_SYMBOL_GPL(sm4_cbc_encrypt); int sm4_avx_cbc_decrypt(struct skcipher_request *req, unsigned int bsize, sm4_crypt_func func) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; unsigned int nbytes; int err; err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes) > 0) { const u8 *src = walk.src.virt.addr; u8 *dst = walk.dst.virt.addr; kernel_fpu_begin(); while (nbytes >= bsize) { func(ctx->rkey_dec, dst, src, walk.iv); dst += bsize; src += bsize; nbytes -= bsize; } while (nbytes >= SM4_BLOCK_SIZE) { u8 keystream[SM4_BLOCK_SIZE * 8]; u8 iv[SM4_BLOCK_SIZE]; unsigned int nblocks = min(nbytes >> 4, 8u); int i; sm4_aesni_avx_crypt8(ctx->rkey_dec, keystream, src, nblocks); src += ((int)nblocks - 2) * SM4_BLOCK_SIZE; dst += (nblocks - 1) * SM4_BLOCK_SIZE; memcpy(iv, src + SM4_BLOCK_SIZE, SM4_BLOCK_SIZE); for (i = nblocks - 1; i > 0; i--) { crypto_xor_cpy(dst, src, &keystream[i * SM4_BLOCK_SIZE], SM4_BLOCK_SIZE); src -= SM4_BLOCK_SIZE; dst -= SM4_BLOCK_SIZE; } crypto_xor_cpy(dst, walk.iv, keystream, SM4_BLOCK_SIZE); memcpy(walk.iv, iv, SM4_BLOCK_SIZE); dst += nblocks * SM4_BLOCK_SIZE; src += (nblocks + 1) * SM4_BLOCK_SIZE; nbytes -= nblocks * SM4_BLOCK_SIZE; } kernel_fpu_end(); err = skcipher_walk_done(&walk, nbytes); } return err; } EXPORT_SYMBOL_GPL(sm4_avx_cbc_decrypt); static int cbc_decrypt(struct skcipher_request *req) { return sm4_avx_cbc_decrypt(req, SM4_CRYPT8_BLOCK_SIZE, sm4_aesni_avx_cbc_dec_blk8); } int sm4_avx_ctr_crypt(struct skcipher_request *req, unsigned int bsize, sm4_crypt_func func) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; unsigned int nbytes; int err; err = skcipher_walk_virt(&walk, req, false); while ((nbytes = walk.nbytes) > 0) { const u8 *src = walk.src.virt.addr; u8 *dst = walk.dst.virt.addr; kernel_fpu_begin(); while (nbytes >= bsize) { func(ctx->rkey_enc, dst, src, walk.iv); dst += bsize; src += bsize; nbytes -= bsize; } while (nbytes >= SM4_BLOCK_SIZE) { u8 keystream[SM4_BLOCK_SIZE * 8]; unsigned int nblocks = min(nbytes >> 4, 8u); int i; for (i = 0; i < nblocks; i++) { memcpy(&keystream[i * SM4_BLOCK_SIZE], walk.iv, SM4_BLOCK_SIZE); crypto_inc(walk.iv, SM4_BLOCK_SIZE); } sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream, keystream, nblocks); crypto_xor_cpy(dst, src, keystream, nblocks * SM4_BLOCK_SIZE); dst += nblocks * SM4_BLOCK_SIZE; src += nblocks * SM4_BLOCK_SIZE; nbytes -= nblocks * SM4_BLOCK_SIZE; } kernel_fpu_end(); /* tail */ if (walk.nbytes == walk.total && nbytes > 0) { u8 keystream[SM4_BLOCK_SIZE]; memcpy(keystream, walk.iv, SM4_BLOCK_SIZE); crypto_inc(walk.iv, SM4_BLOCK_SIZE); sm4_crypt_block(ctx->rkey_enc, keystream, keystream); crypto_xor_cpy(dst, src, keystream, nbytes); dst += nbytes; src += nbytes; nbytes = 0; } err = skcipher_walk_done(&walk, nbytes); } return err; } EXPORT_SYMBOL_GPL(sm4_avx_ctr_crypt); static int ctr_crypt(struct skcipher_request *req) { return sm4_avx_ctr_crypt(req, SM4_CRYPT8_BLOCK_SIZE, sm4_aesni_avx_ctr_enc_blk8); } static struct skcipher_alg sm4_aesni_avx_skciphers[] = { { .base = { .cra_name = "ecb(sm4)", .cra_driver_name = "ecb-sm4-aesni-avx", .cra_priority = 400, .cra_blocksize = SM4_BLOCK_SIZE, .cra_ctxsize = sizeof(struct sm4_ctx), .cra_module = THIS_MODULE, }, .min_keysize = SM4_KEY_SIZE, .max_keysize = SM4_KEY_SIZE, .walksize = 8 * SM4_BLOCK_SIZE, .setkey = sm4_skcipher_setkey, .encrypt = sm4_avx_ecb_encrypt, .decrypt = sm4_avx_ecb_decrypt, }, { .base = { .cra_name = "cbc(sm4)", .cra_driver_name = "cbc-sm4-aesni-avx", .cra_priority = 400, .cra_blocksize = SM4_BLOCK_SIZE, .cra_ctxsize = sizeof(struct sm4_ctx), .cra_module = THIS_MODULE, }, .min_keysize = SM4_KEY_SIZE, .max_keysize = SM4_KEY_SIZE, .ivsize = SM4_BLOCK_SIZE, .walksize = 8 * SM4_BLOCK_SIZE, .setkey = sm4_skcipher_setkey, .encrypt = sm4_cbc_encrypt, .decrypt = cbc_decrypt, }, { .base = { .cra_name = "ctr(sm4)", .cra_driver_name = "ctr-sm4-aesni-avx", .cra_priority = 400, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct sm4_ctx), .cra_module = THIS_MODULE, }, .min_keysize = SM4_KEY_SIZE, .max_keysize = SM4_KEY_SIZE, .ivsize = SM4_BLOCK_SIZE, .chunksize = SM4_BLOCK_SIZE, .walksize = 8 * SM4_BLOCK_SIZE, .setkey = sm4_skcipher_setkey, .encrypt = ctr_crypt, .decrypt = ctr_crypt, } }; static int __init sm4_init(void) { const char *feature_name; if (!boot_cpu_has(X86_FEATURE_AVX) || !boot_cpu_has(X86_FEATURE_AES) || !boot_cpu_has(X86_FEATURE_OSXSAVE)) { pr_info("AVX or AES-NI instructions are not detected.\n"); return -ENODEV; } if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, &feature_name)) { pr_info("CPU feature '%s' is not supported.\n", feature_name); return -ENODEV; } return crypto_register_skciphers(sm4_aesni_avx_skciphers, ARRAY_SIZE(sm4_aesni_avx_skciphers)); } static void __exit sm4_exit(void) { crypto_unregister_skciphers(sm4_aesni_avx_skciphers, ARRAY_SIZE(sm4_aesni_avx_skciphers)); } module_init(sm4_init); module_exit(sm4_exit); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>"); MODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX optimized"); MODULE_ALIAS_CRYPTO("sm4"); MODULE_ALIAS_CRYPTO("sm4-aesni-avx");
3 1 1 2 2 1 1 3 1 2 2 2 2 2 5 5 2 3 2 4 4 4 1 1 2 2 5 1 1 3 1 2 2 2 2 1 2 10 1 1 1 2 1 1 3 2 7 1 1 5 1 1 2 1 4 2 1 1 1 3 3 3 1 1 1 1 1 5 5 2 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) ST-Ericsson AB 2010 * Author: Sjur Brendeland */ #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ #include <linux/filter.h> #include <linux/fs.h> #include <linux/init.h> #include <linux/module.h> #include <linux/sched/signal.h> #include <linux/spinlock.h> #include <linux/mutex.h> #include <linux/list.h> #include <linux/wait.h> #include <linux/poll.h> #include <linux/tcp.h> #include <linux/uaccess.h> #include <linux/debugfs.h> #include <linux/caif/caif_socket.h> #include <linux/pkt_sched.h> #include <net/sock.h> #include <net/tcp_states.h> #include <net/caif/caif_layer.h> #include <net/caif/caif_dev.h> #include <net/caif/cfpkt.h> MODULE_DESCRIPTION("ST-Ericsson CAIF modem protocol socket support (AF_CAIF)"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(AF_CAIF); /* * CAIF state is re-using the TCP socket states. * caif_states stored in sk_state reflect the state as reported by * the CAIF stack, while sk_socket->state is the state of the socket. */ enum caif_states { CAIF_CONNECTED = TCP_ESTABLISHED, CAIF_CONNECTING = TCP_SYN_SENT, CAIF_DISCONNECTED = TCP_CLOSE }; #define TX_FLOW_ON_BIT 1 #define RX_FLOW_ON_BIT 2 struct caifsock { struct sock sk; /* must be first member */ struct cflayer layer; unsigned long flow_state; struct caif_connect_request conn_req; struct mutex readlock; struct dentry *debugfs_socket_dir; int headroom, tailroom, maxframe; }; static int rx_flow_is_on(struct caifsock *cf_sk) { return test_bit(RX_FLOW_ON_BIT, &cf_sk->flow_state); } static int tx_flow_is_on(struct caifsock *cf_sk) { return test_bit(TX_FLOW_ON_BIT, &cf_sk->flow_state); } static void set_rx_flow_off(struct caifsock *cf_sk) { clear_bit(RX_FLOW_ON_BIT, &cf_sk->flow_state); } static void set_rx_flow_on(struct caifsock *cf_sk) { set_bit(RX_FLOW_ON_BIT, &cf_sk->flow_state); } static void set_tx_flow_off(struct caifsock *cf_sk) { clear_bit(TX_FLOW_ON_BIT, &cf_sk->flow_state); } static void set_tx_flow_on(struct caifsock *cf_sk) { set_bit(TX_FLOW_ON_BIT, &cf_sk->flow_state); } static void caif_read_lock(struct sock *sk) { struct caifsock *cf_sk; cf_sk = container_of(sk, struct caifsock, sk); mutex_lock(&cf_sk->readlock); } static void caif_read_unlock(struct sock *sk) { struct caifsock *cf_sk; cf_sk = container_of(sk, struct caifsock, sk); mutex_unlock(&cf_sk->readlock); } static int sk_rcvbuf_lowwater(struct caifsock *cf_sk) { /* A quarter of full buffer is used a low water mark */ return cf_sk->sk.sk_rcvbuf / 4; } static void caif_flow_ctrl(struct sock *sk, int mode) { struct caifsock *cf_sk; cf_sk = container_of(sk, struct caifsock, sk); if (cf_sk->layer.dn && cf_sk->layer.dn->modemcmd) cf_sk->layer.dn->modemcmd(cf_sk->layer.dn, mode); } /* * Copied from sock.c:sock_queue_rcv_skb(), but changed so packets are * not dropped, but CAIF is sending flow off instead. */ static void caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int err; unsigned long flags; struct sk_buff_head *list = &sk->sk_receive_queue; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); bool queued = false; if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= (unsigned int)sk->sk_rcvbuf && rx_flow_is_on(cf_sk)) { net_dbg_ratelimited("sending flow OFF (queue len = %d %d)\n", atomic_read(&cf_sk->sk.sk_rmem_alloc), sk_rcvbuf_lowwater(cf_sk)); set_rx_flow_off(cf_sk); caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ); } err = sk_filter(sk, skb); if (err) goto out; if (!sk_rmem_schedule(sk, skb, skb->truesize) && rx_flow_is_on(cf_sk)) { set_rx_flow_off(cf_sk); net_dbg_ratelimited("sending flow OFF due to rmem_schedule\n"); caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ); } skb->dev = NULL; skb_set_owner_r(skb, sk); spin_lock_irqsave(&list->lock, flags); queued = !sock_flag(sk, SOCK_DEAD); if (queued) __skb_queue_tail(list, skb); spin_unlock_irqrestore(&list->lock, flags); out: if (queued) sk->sk_data_ready(sk); else kfree_skb(skb); } /* Packet Receive Callback function called from CAIF Stack */ static int caif_sktrecv_cb(struct cflayer *layr, struct cfpkt *pkt) { struct caifsock *cf_sk; struct sk_buff *skb; cf_sk = container_of(layr, struct caifsock, layer); skb = cfpkt_tonative(pkt); if (unlikely(cf_sk->sk.sk_state != CAIF_CONNECTED)) { kfree_skb(skb); return 0; } caif_queue_rcv_skb(&cf_sk->sk, skb); return 0; } static void cfsk_hold(struct cflayer *layr) { struct caifsock *cf_sk = container_of(layr, struct caifsock, layer); sock_hold(&cf_sk->sk); } static void cfsk_put(struct cflayer *layr) { struct caifsock *cf_sk = container_of(layr, struct caifsock, layer); sock_put(&cf_sk->sk); } /* Packet Control Callback function called from CAIF */ static void caif_ctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow, int phyid) { struct caifsock *cf_sk = container_of(layr, struct caifsock, layer); switch (flow) { case CAIF_CTRLCMD_FLOW_ON_IND: /* OK from modem to start sending again */ set_tx_flow_on(cf_sk); cf_sk->sk.sk_state_change(&cf_sk->sk); break; case CAIF_CTRLCMD_FLOW_OFF_IND: /* Modem asks us to shut up */ set_tx_flow_off(cf_sk); cf_sk->sk.sk_state_change(&cf_sk->sk); break; case CAIF_CTRLCMD_INIT_RSP: /* We're now connected */ caif_client_register_refcnt(&cf_sk->layer, cfsk_hold, cfsk_put); cf_sk->sk.sk_state = CAIF_CONNECTED; set_tx_flow_on(cf_sk); cf_sk->sk.sk_shutdown = 0; cf_sk->sk.sk_state_change(&cf_sk->sk); break; case CAIF_CTRLCMD_DEINIT_RSP: /* We're now disconnected */ cf_sk->sk.sk_state = CAIF_DISCONNECTED; cf_sk->sk.sk_state_change(&cf_sk->sk); break; case CAIF_CTRLCMD_INIT_FAIL_RSP: /* Connect request failed */ cf_sk->sk.sk_err = ECONNREFUSED; cf_sk->sk.sk_state = CAIF_DISCONNECTED; cf_sk->sk.sk_shutdown = SHUTDOWN_MASK; /* * Socket "standards" seems to require POLLOUT to * be set at connect failure. */ set_tx_flow_on(cf_sk); cf_sk->sk.sk_state_change(&cf_sk->sk); break; case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND: /* Modem has closed this connection, or device is down. */ cf_sk->sk.sk_shutdown = SHUTDOWN_MASK; cf_sk->sk.sk_err = ECONNRESET; set_rx_flow_on(cf_sk); sk_error_report(&cf_sk->sk); break; default: pr_debug("Unexpected flow command %d\n", flow); } } static void caif_check_flow_release(struct sock *sk) { struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); if (rx_flow_is_on(cf_sk)) return; if (atomic_read(&sk->sk_rmem_alloc) <= sk_rcvbuf_lowwater(cf_sk)) { set_rx_flow_on(cf_sk); caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_ON_REQ); } } /* * Copied from unix_dgram_recvmsg, but removed credit checks, * changed locking, address handling and added MSG_TRUNC. */ static int caif_seqpkt_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; int ret; int copylen; ret = -EOPNOTSUPP; if (flags & MSG_OOB) goto read_error; skb = skb_recv_datagram(sk, flags, &ret); if (!skb) goto read_error; copylen = skb->len; if (len < copylen) { m->msg_flags |= MSG_TRUNC; copylen = len; } ret = skb_copy_datagram_msg(skb, 0, m, copylen); if (ret) goto out_free; ret = (flags & MSG_TRUNC) ? skb->len : copylen; out_free: skb_free_datagram(sk, skb); caif_check_flow_release(sk); return ret; read_error: return ret; } /* Copied from unix_stream_wait_data, identical except for lock call. */ static long caif_stream_data_wait(struct sock *sk, long timeo) { DEFINE_WAIT(wait); lock_sock(sk); for (;;) { prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (!skb_queue_empty(&sk->sk_receive_queue) || sk->sk_err || sk->sk_state != CAIF_CONNECTED || sock_flag(sk, SOCK_DEAD) || (sk->sk_shutdown & RCV_SHUTDOWN) || signal_pending(current) || !timeo) break; sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); if (sock_flag(sk, SOCK_DEAD)) break; sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); } finish_wait(sk_sleep(sk), &wait); release_sock(sk); return timeo; } /* * Copied from unix_stream_recvmsg, but removed credit checks, * changed locking calls, changed address handling. */ static int caif_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; int copied = 0; int target; int err = 0; long timeo; err = -EOPNOTSUPP; if (flags&MSG_OOB) goto out; /* * Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg */ err = -EAGAIN; if (sk->sk_state == CAIF_CONNECTING) goto out; caif_read_lock(sk); target = sock_rcvlowat(sk, flags&MSG_WAITALL, size); timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT); do { int chunk; struct sk_buff *skb; lock_sock(sk); if (sock_flag(sk, SOCK_DEAD)) { err = -ECONNRESET; goto unlock; } skb = skb_dequeue(&sk->sk_receive_queue); caif_check_flow_release(sk); if (skb == NULL) { if (copied >= target) goto unlock; /* * POSIX 1003.1g mandates this order. */ err = sock_error(sk); if (err) goto unlock; err = -ECONNRESET; if (sk->sk_shutdown & RCV_SHUTDOWN) goto unlock; err = -EPIPE; if (sk->sk_state != CAIF_CONNECTED) goto unlock; if (sock_flag(sk, SOCK_DEAD)) goto unlock; release_sock(sk); err = -EAGAIN; if (!timeo) break; caif_read_unlock(sk); timeo = caif_stream_data_wait(sk, timeo); if (signal_pending(current)) { err = sock_intr_errno(timeo); goto out; } caif_read_lock(sk); continue; unlock: release_sock(sk); break; } release_sock(sk); chunk = min_t(unsigned int, skb->len, size); if (memcpy_to_msg(msg, skb->data, chunk)) { skb_queue_head(&sk->sk_receive_queue, skb); if (copied == 0) copied = -EFAULT; break; } copied += chunk; size -= chunk; /* Mark read part of skb as used */ if (!(flags & MSG_PEEK)) { skb_pull(skb, chunk); /* put the skb back if we didn't use it up. */ if (skb->len) { skb_queue_head(&sk->sk_receive_queue, skb); break; } kfree_skb(skb); } else { /* * It is questionable, see note in unix_dgram_recvmsg. */ /* put message back and return */ skb_queue_head(&sk->sk_receive_queue, skb); break; } } while (size); caif_read_unlock(sk); out: return copied ? : err; } /* * Copied from sock.c:sock_wait_for_wmem, but change to wait for * CAIF flow-on and sock_writable. */ static long caif_wait_for_flow_on(struct caifsock *cf_sk, int wait_writeable, long timeo, int *err) { struct sock *sk = &cf_sk->sk; DEFINE_WAIT(wait); for (;;) { *err = 0; if (tx_flow_is_on(cf_sk) && (!wait_writeable || sock_writeable(&cf_sk->sk))) break; *err = -ETIMEDOUT; if (!timeo) break; *err = -ERESTARTSYS; if (signal_pending(current)) break; prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); *err = -ECONNRESET; if (sk->sk_shutdown & SHUTDOWN_MASK) break; *err = -sk->sk_err; if (sk->sk_err) break; *err = -EPIPE; if (cf_sk->sk.sk_state != CAIF_CONNECTED) break; timeo = schedule_timeout(timeo); } finish_wait(sk_sleep(sk), &wait); return timeo; } /* * Transmit a SKB. The device may temporarily request re-transmission * by returning EAGAIN. */ static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk, int noblock, long timeo) { struct cfpkt *pkt; pkt = cfpkt_fromnative(CAIF_DIR_OUT, skb); memset(skb->cb, 0, sizeof(struct caif_payload_info)); cfpkt_set_prio(pkt, cf_sk->sk.sk_priority); if (cf_sk->layer.dn == NULL) { kfree_skb(skb); return -EINVAL; } return cf_sk->layer.dn->transmit(cf_sk->layer.dn, pkt); } /* Copied from af_unix:unix_dgram_sendmsg, and adapted to CAIF */ static int caif_seqpkt_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); int buffer_size; int ret = 0; struct sk_buff *skb = NULL; int noblock; long timeo; caif_assert(cf_sk); ret = sock_error(sk); if (ret) goto err; ret = -EOPNOTSUPP; if (msg->msg_flags&MSG_OOB) goto err; ret = -EOPNOTSUPP; if (msg->msg_namelen) goto err; noblock = msg->msg_flags & MSG_DONTWAIT; timeo = sock_sndtimeo(sk, noblock); timeo = caif_wait_for_flow_on(container_of(sk, struct caifsock, sk), 1, timeo, &ret); if (ret) goto err; ret = -EPIPE; if (cf_sk->sk.sk_state != CAIF_CONNECTED || sock_flag(sk, SOCK_DEAD) || (sk->sk_shutdown & RCV_SHUTDOWN)) goto err; /* Error if trying to write more than maximum frame size. */ ret = -EMSGSIZE; if (len > cf_sk->maxframe && cf_sk->sk.sk_protocol != CAIFPROTO_RFM) goto err; buffer_size = len + cf_sk->headroom + cf_sk->tailroom; ret = -ENOMEM; skb = sock_alloc_send_skb(sk, buffer_size, noblock, &ret); if (!skb || skb_tailroom(skb) < buffer_size) goto err; skb_reserve(skb, cf_sk->headroom); ret = memcpy_from_msg(skb_put(skb, len), msg, len); if (ret) goto err; ret = transmit_skb(skb, cf_sk, noblock, timeo); if (ret < 0) /* skb is already freed */ return ret; return len; err: kfree_skb(skb); return ret; } /* * Copied from unix_stream_sendmsg and adapted to CAIF: * Changed removed permission handling and added waiting for flow on * and other minor adaptations. */ static int caif_stream_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); int err, size; struct sk_buff *skb; int sent = 0; long timeo; err = -EOPNOTSUPP; if (unlikely(msg->msg_flags&MSG_OOB)) goto out_err; if (unlikely(msg->msg_namelen)) goto out_err; timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); timeo = caif_wait_for_flow_on(cf_sk, 1, timeo, &err); if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN)) goto pipe_err; while (sent < len) { size = len-sent; if (size > cf_sk->maxframe) size = cf_sk->maxframe; /* If size is more than half of sndbuf, chop up message */ if (size > ((sk->sk_sndbuf >> 1) - 64)) size = (sk->sk_sndbuf >> 1) - 64; if (size > SKB_MAX_ALLOC) size = SKB_MAX_ALLOC; skb = sock_alloc_send_skb(sk, size + cf_sk->headroom + cf_sk->tailroom, msg->msg_flags&MSG_DONTWAIT, &err); if (skb == NULL) goto out_err; skb_reserve(skb, cf_sk->headroom); /* * If you pass two values to the sock_alloc_send_skb * it tries to grab the large buffer with GFP_NOFS * (which can fail easily), and if it fails grab the * fallback size buffer which is under a page and will * succeed. [Alan] */ size = min_t(int, size, skb_tailroom(skb)); err = memcpy_from_msg(skb_put(skb, size), msg, size); if (err) { kfree_skb(skb); goto out_err; } err = transmit_skb(skb, cf_sk, msg->msg_flags&MSG_DONTWAIT, timeo); if (err < 0) /* skb is already freed */ goto pipe_err; sent += size; } return sent; pipe_err: if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) send_sig(SIGPIPE, current, 0); err = -EPIPE; out_err: return sent ? : err; } static int setsockopt(struct socket *sock, int lvl, int opt, sockptr_t ov, unsigned int ol) { struct sock *sk = sock->sk; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); int linksel; if (cf_sk->sk.sk_socket->state != SS_UNCONNECTED) return -ENOPROTOOPT; switch (opt) { case CAIFSO_LINK_SELECT: if (ol < sizeof(int)) return -EINVAL; if (lvl != SOL_CAIF) goto bad_sol; if (copy_from_sockptr(&linksel, ov, sizeof(int))) return -EINVAL; lock_sock(&(cf_sk->sk)); cf_sk->conn_req.link_selector = linksel; release_sock(&cf_sk->sk); return 0; case CAIFSO_REQ_PARAM: if (lvl != SOL_CAIF) goto bad_sol; if (cf_sk->sk.sk_protocol != CAIFPROTO_UTIL) return -ENOPROTOOPT; lock_sock(&(cf_sk->sk)); if (ol > sizeof(cf_sk->conn_req.param.data) || copy_from_sockptr(&cf_sk->conn_req.param.data, ov, ol)) { release_sock(&cf_sk->sk); return -EINVAL; } cf_sk->conn_req.param.size = ol; release_sock(&cf_sk->sk); return 0; default: return -ENOPROTOOPT; } return 0; bad_sol: return -ENOPROTOOPT; } /* * caif_connect() - Connect a CAIF Socket * Copied and modified af_irda.c:irda_connect(). * * Note : by consulting "errno", the user space caller may learn the cause * of the failure. Most of them are visible in the function, others may come * from subroutines called and are listed here : * o -EAFNOSUPPORT: bad socket family or type. * o -ESOCKTNOSUPPORT: bad socket type or protocol * o -EINVAL: bad socket address, or CAIF link type * o -ECONNREFUSED: remote end refused the connection. * o -EINPROGRESS: connect request sent but timed out (or non-blocking) * o -EISCONN: already connected. * o -ETIMEDOUT: Connection timed out (send timeout) * o -ENODEV: No link layer to send request * o -ECONNRESET: Received Shutdown indication or lost link layer * o -ENOMEM: Out of memory * * State Strategy: * o sk_state: holds the CAIF_* protocol state, it's updated by * caif_ctrl_cb. * o sock->state: holds the SS_* socket state and is updated by connect and * disconnect. */ static int caif_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); long timeo; int err; int ifindex, headroom, tailroom; unsigned int mtu; struct net_device *dev; lock_sock(sk); err = -EINVAL; if (addr_len < offsetofend(struct sockaddr, sa_family)) goto out; err = -EAFNOSUPPORT; if (uaddr->sa_family != AF_CAIF) goto out; switch (sock->state) { case SS_UNCONNECTED: /* Normal case, a fresh connect */ caif_assert(sk->sk_state == CAIF_DISCONNECTED); break; case SS_CONNECTING: switch (sk->sk_state) { case CAIF_CONNECTED: sock->state = SS_CONNECTED; err = -EISCONN; goto out; case CAIF_DISCONNECTED: /* Reconnect allowed */ break; case CAIF_CONNECTING: err = -EALREADY; if (flags & O_NONBLOCK) goto out; goto wait_connect; } break; case SS_CONNECTED: caif_assert(sk->sk_state == CAIF_CONNECTED || sk->sk_state == CAIF_DISCONNECTED); if (sk->sk_shutdown & SHUTDOWN_MASK) { /* Allow re-connect after SHUTDOWN_IND */ caif_disconnect_client(sock_net(sk), &cf_sk->layer); caif_free_client(&cf_sk->layer); break; } /* No reconnect on a seqpacket socket */ err = -EISCONN; goto out; case SS_DISCONNECTING: case SS_FREE: caif_assert(1); /*Should never happen */ break; } sk->sk_state = CAIF_DISCONNECTED; sock->state = SS_UNCONNECTED; sk_stream_kill_queues(&cf_sk->sk); err = -EINVAL; if (addr_len != sizeof(struct sockaddr_caif)) goto out; memcpy(&cf_sk->conn_req.sockaddr, uaddr, sizeof(struct sockaddr_caif)); /* Move to connecting socket, start sending Connect Requests */ sock->state = SS_CONNECTING; sk->sk_state = CAIF_CONNECTING; /* Check priority value comming from socket */ /* if priority value is out of range it will be ajusted */ if (cf_sk->sk.sk_priority > CAIF_PRIO_MAX) cf_sk->conn_req.priority = CAIF_PRIO_MAX; else if (cf_sk->sk.sk_priority < CAIF_PRIO_MIN) cf_sk->conn_req.priority = CAIF_PRIO_MIN; else cf_sk->conn_req.priority = cf_sk->sk.sk_priority; /*ifindex = id of the interface.*/ cf_sk->conn_req.ifindex = cf_sk->sk.sk_bound_dev_if; cf_sk->layer.receive = caif_sktrecv_cb; err = caif_connect_client(sock_net(sk), &cf_sk->conn_req, &cf_sk->layer, &ifindex, &headroom, &tailroom); if (err < 0) { cf_sk->sk.sk_socket->state = SS_UNCONNECTED; cf_sk->sk.sk_state = CAIF_DISCONNECTED; goto out; } err = -ENODEV; rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), ifindex); if (!dev) { rcu_read_unlock(); goto out; } cf_sk->headroom = LL_RESERVED_SPACE_EXTRA(dev, headroom); mtu = dev->mtu; rcu_read_unlock(); cf_sk->tailroom = tailroom; cf_sk->maxframe = mtu - (headroom + tailroom); if (cf_sk->maxframe < 1) { pr_warn("CAIF Interface MTU too small (%d)\n", dev->mtu); err = -ENODEV; goto out; } err = -EINPROGRESS; wait_connect: if (sk->sk_state != CAIF_CONNECTED && (flags & O_NONBLOCK)) goto out; timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); release_sock(sk); err = -ERESTARTSYS; timeo = wait_event_interruptible_timeout(*sk_sleep(sk), sk->sk_state != CAIF_CONNECTING, timeo); lock_sock(sk); if (timeo < 0) goto out; /* -ERESTARTSYS */ err = -ETIMEDOUT; if (timeo == 0 && sk->sk_state != CAIF_CONNECTED) goto out; if (sk->sk_state != CAIF_CONNECTED) { sock->state = SS_UNCONNECTED; err = sock_error(sk); if (!err) err = -ECONNREFUSED; goto out; } sock->state = SS_CONNECTED; err = 0; out: release_sock(sk); return err; } /* * caif_release() - Disconnect a CAIF Socket * Copied and modified af_irda.c:irda_release(). */ static int caif_release(struct socket *sock) { struct sock *sk = sock->sk; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); if (!sk) return 0; set_tx_flow_off(cf_sk); /* * Ensure that packets are not queued after this point in time. * caif_queue_rcv_skb checks SOCK_DEAD holding the queue lock, * this ensures no packets when sock is dead. */ spin_lock_bh(&sk->sk_receive_queue.lock); sock_set_flag(sk, SOCK_DEAD); spin_unlock_bh(&sk->sk_receive_queue.lock); sock->sk = NULL; WARN_ON(IS_ERR(cf_sk->debugfs_socket_dir)); debugfs_remove_recursive(cf_sk->debugfs_socket_dir); lock_sock(&(cf_sk->sk)); sk->sk_state = CAIF_DISCONNECTED; sk->sk_shutdown = SHUTDOWN_MASK; caif_disconnect_client(sock_net(sk), &cf_sk->layer); cf_sk->sk.sk_socket->state = SS_DISCONNECTING; wake_up_interruptible_poll(sk_sleep(sk), EPOLLERR|EPOLLHUP); sock_orphan(sk); sk_stream_kill_queues(&cf_sk->sk); release_sock(sk); sock_put(sk); return 0; } /* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */ static __poll_t caif_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; __poll_t mask; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); sock_poll_wait(file, sock, wait); mask = 0; /* exceptional events? */ if (sk->sk_err) mask |= EPOLLERR; if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= EPOLLHUP; if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= EPOLLRDHUP; /* readable? */ if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || (sk->sk_shutdown & RCV_SHUTDOWN)) mask |= EPOLLIN | EPOLLRDNORM; /* * we set writable also when the other side has shut down the * connection. This prevents stuck sockets. */ if (sock_writeable(sk) && tx_flow_is_on(cf_sk)) mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; return mask; } static const struct proto_ops caif_seqpacket_ops = { .family = PF_CAIF, .owner = THIS_MODULE, .release = caif_release, .bind = sock_no_bind, .connect = caif_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, .poll = caif_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = setsockopt, .sendmsg = caif_seqpkt_sendmsg, .recvmsg = caif_seqpkt_recvmsg, .mmap = sock_no_mmap, }; static const struct proto_ops caif_stream_ops = { .family = PF_CAIF, .owner = THIS_MODULE, .release = caif_release, .bind = sock_no_bind, .connect = caif_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, .poll = caif_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = setsockopt, .sendmsg = caif_stream_sendmsg, .recvmsg = caif_stream_recvmsg, .mmap = sock_no_mmap, }; /* This function is called when a socket is finally destroyed. */ static void caif_sock_destructor(struct sock *sk) { struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); caif_assert(!refcount_read(&sk->sk_wmem_alloc)); caif_assert(sk_unhashed(sk)); caif_assert(!sk->sk_socket); if (!sock_flag(sk, SOCK_DEAD)) { pr_debug("Attempt to release alive CAIF socket: %p\n", sk); return; } sk_stream_kill_queues(&cf_sk->sk); WARN_ON_ONCE(sk->sk_forward_alloc); caif_free_client(&cf_sk->layer); } static int caif_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk = NULL; struct caifsock *cf_sk = NULL; static struct proto prot = {.name = "PF_CAIF", .owner = THIS_MODULE, .obj_size = sizeof(struct caifsock), .useroffset = offsetof(struct caifsock, conn_req.param), .usersize = sizeof_field(struct caifsock, conn_req.param) }; if (!capable(CAP_SYS_ADMIN) && !capable(CAP_NET_ADMIN)) return -EPERM; /* * The sock->type specifies the socket type to use. * The CAIF socket is a packet stream in the sense * that it is packet based. CAIF trusts the reliability * of the link, no resending is implemented. */ if (sock->type == SOCK_SEQPACKET) sock->ops = &caif_seqpacket_ops; else if (sock->type == SOCK_STREAM) sock->ops = &caif_stream_ops; else return -ESOCKTNOSUPPORT; if (protocol < 0 || protocol >= CAIFPROTO_MAX) return -EPROTONOSUPPORT; /* * Set the socket state to unconnected. The socket state * is really not used at all in the net/core or socket.c but the * initialization makes sure that sock->state is not uninitialized. */ sk = sk_alloc(net, PF_CAIF, GFP_KERNEL, &prot, kern); if (!sk) return -ENOMEM; cf_sk = container_of(sk, struct caifsock, sk); /* Store the protocol */ sk->sk_protocol = (unsigned char) protocol; /* Initialize default priority for well-known cases */ switch (protocol) { case CAIFPROTO_AT: sk->sk_priority = TC_PRIO_CONTROL; break; case CAIFPROTO_RFM: sk->sk_priority = TC_PRIO_INTERACTIVE_BULK; break; default: sk->sk_priority = TC_PRIO_BESTEFFORT; } /* * Lock in order to try to stop someone from opening the socket * too early. */ lock_sock(&(cf_sk->sk)); /* Initialize the nozero default sock structure data. */ sock_init_data(sock, sk); sk->sk_destruct = caif_sock_destructor; mutex_init(&cf_sk->readlock); /* single task reading lock */ cf_sk->layer.ctrlcmd = caif_ctrl_cb; cf_sk->sk.sk_socket->state = SS_UNCONNECTED; cf_sk->sk.sk_state = CAIF_DISCONNECTED; set_tx_flow_off(cf_sk); set_rx_flow_on(cf_sk); /* Set default options on configuration */ cf_sk->conn_req.link_selector = CAIF_LINK_LOW_LATENCY; cf_sk->conn_req.protocol = protocol; release_sock(&cf_sk->sk); return 0; } static const struct net_proto_family caif_family_ops = { .family = PF_CAIF, .create = caif_create, .owner = THIS_MODULE, }; static int __init caif_sktinit_module(void) { return sock_register(&caif_family_ops); } static void __exit caif_sktexit_module(void) { sock_unregister(PF_CAIF); } module_init(caif_sktinit_module); module_exit(caif_sktexit_module);
29 28 28 28 28 30 29 29 30 29 30 30 30 29 7 27 29 29 30 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2010 Red Hat, Inc. * Copyright (c) 2016-2021 Christoph Hellwig. */ #include <linux/iomap.h> #include "trace.h" static inline void iomap_iter_reset_iomap(struct iomap_iter *iter) { if (iter->fbatch) { folio_batch_release(iter->fbatch); kfree(iter->fbatch); iter->fbatch = NULL; } iter->status = 0; memset(&iter->iomap, 0, sizeof(iter->iomap)); memset(&iter->srcmap, 0, sizeof(iter->srcmap)); } /* Advance the current iterator position and decrement the remaining length */ int iomap_iter_advance(struct iomap_iter *iter, u64 count) { if (WARN_ON_ONCE(count > iomap_length(iter))) return -EIO; iter->pos += count; iter->len -= count; return 0; } static inline void iomap_iter_done(struct iomap_iter *iter) { WARN_ON_ONCE(iter->iomap.offset > iter->pos); WARN_ON_ONCE(iter->iomap.length == 0); WARN_ON_ONCE(iter->iomap.offset + iter->iomap.length <= iter->pos); WARN_ON_ONCE(iter->iomap.flags & IOMAP_F_STALE); iter->iter_start_pos = iter->pos; trace_iomap_iter_dstmap(iter->inode, &iter->iomap); if (iter->srcmap.type != IOMAP_HOLE) trace_iomap_iter_srcmap(iter->inode, &iter->srcmap); } /** * iomap_iter - iterate over a ranges in a file * @iter: iteration structue * @ops: iomap ops provided by the file system * * Iterate over filesystem-provided space mappings for the provided file range. * * This function handles cleanup of resources acquired for iteration when the * filesystem indicates there are no more space mappings, which means that this * function must be called in a loop that continues as long it returns a * positive value. If 0 or a negative value is returned, the caller must not * return to the loop body. Within a loop body, there are two ways to break out * of the loop body: leave @iter.status unchanged, or set it to a negative * errno. */ int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops) { bool stale = iter->iomap.flags & IOMAP_F_STALE; ssize_t advanced; u64 olen; int ret; trace_iomap_iter(iter, ops, _RET_IP_); if (!iter->iomap.length) goto begin; /* * Calculate how far the iter was advanced and the original length bytes * for ->iomap_end(). */ advanced = iter->pos - iter->iter_start_pos; olen = iter->len + advanced; if (ops->iomap_end) { ret = ops->iomap_end(iter->inode, iter->iter_start_pos, iomap_length_trim(iter, iter->iter_start_pos, olen), advanced, iter->flags, &iter->iomap); if (ret < 0 && !advanced) return ret; } /* detect old return semantics where this would advance */ if (WARN_ON_ONCE(iter->status > 0)) iter->status = -EIO; /* * Use iter->len to determine whether to continue onto the next mapping. * Explicitly terminate on error status or if the current iter has not * advanced at all (i.e. no work was done for some reason) unless the * mapping has been marked stale and needs to be reprocessed. */ if (iter->status < 0) ret = iter->status; else if (iter->len == 0 || (!advanced && !stale)) ret = 0; else ret = 1; iomap_iter_reset_iomap(iter); if (ret <= 0) return ret; begin: ret = ops->iomap_begin(iter->inode, iter->pos, iter->len, iter->flags, &iter->iomap, &iter->srcmap); if (ret < 0) return ret; iomap_iter_done(iter); return 1; }
6 6 6 6 6 10 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 // SPDX-License-Identifier: GPL-2.0-or-later /* Instantiate a public key crypto key from an X.509 Certificate * * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #define pr_fmt(fmt) "X.509: "fmt #include <crypto/hash.h> #include <keys/asymmetric-parser.h> #include <keys/asymmetric-subtype.h> #include <keys/system_keyring.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/string.h> #include "asymmetric_keys.h" #include "x509_parser.h" /* * Set up the signature parameters in an X.509 certificate. This involves * digesting the signed data and extracting the signature. */ int x509_get_sig_params(struct x509_certificate *cert) { struct public_key_signature *sig = cert->sig; struct crypto_shash *tfm; struct shash_desc *desc; size_t desc_size; int ret; pr_devel("==>%s()\n", __func__); sig->s = kmemdup(cert->raw_sig, cert->raw_sig_size, GFP_KERNEL); if (!sig->s) return -ENOMEM; sig->s_size = cert->raw_sig_size; /* Allocate the hashing algorithm we're going to need and find out how * big the hash operational data will be. */ tfm = crypto_alloc_shash(sig->hash_algo, 0, 0); if (IS_ERR(tfm)) { if (PTR_ERR(tfm) == -ENOENT) { cert->unsupported_sig = true; return 0; } return PTR_ERR(tfm); } desc_size = crypto_shash_descsize(tfm) + sizeof(*desc); sig->digest_size = crypto_shash_digestsize(tfm); ret = -ENOMEM; sig->digest = kmalloc(sig->digest_size, GFP_KERNEL); if (!sig->digest) goto error; desc = kzalloc(desc_size, GFP_KERNEL); if (!desc) goto error; desc->tfm = tfm; ret = crypto_shash_digest(desc, cert->tbs, cert->tbs_size, sig->digest); if (ret < 0) goto error_2; ret = is_hash_blacklisted(sig->digest, sig->digest_size, BLACKLIST_HASH_X509_TBS); if (ret == -EKEYREJECTED) { pr_err("Cert %*phN is blacklisted\n", sig->digest_size, sig->digest); cert->blacklisted = true; ret = 0; } error_2: kfree(desc); error: crypto_free_shash(tfm); pr_devel("<==%s() = %d\n", __func__, ret); return ret; } /* * Check for self-signedness in an X.509 cert and if found, check the signature * immediately if we can. */ int x509_check_for_self_signed(struct x509_certificate *cert) { int ret = 0; pr_devel("==>%s()\n", __func__); if (cert->raw_subject_size != cert->raw_issuer_size || memcmp(cert->raw_subject, cert->raw_issuer, cert->raw_issuer_size) != 0) goto not_self_signed; if (cert->sig->auth_ids[0] || cert->sig->auth_ids[1]) { /* If the AKID is present it may have one or two parts. If * both are supplied, both must match. */ bool a = asymmetric_key_id_same(cert->skid, cert->sig->auth_ids[1]); bool b = asymmetric_key_id_same(cert->id, cert->sig->auth_ids[0]); if (!a && !b) goto not_self_signed; ret = -EKEYREJECTED; if (((a && !b) || (b && !a)) && cert->sig->auth_ids[0] && cert->sig->auth_ids[1]) goto out; } if (cert->unsupported_sig) { ret = 0; goto out; } ret = public_key_verify_signature(cert->pub, cert->sig); if (ret < 0) { if (ret == -ENOPKG) { cert->unsupported_sig = true; ret = 0; } goto out; } pr_devel("Cert Self-signature verified"); cert->self_signed = true; out: pr_devel("<==%s() = %d\n", __func__, ret); return ret; not_self_signed: pr_devel("<==%s() = 0 [not]\n", __func__); return 0; } /* * Attempt to parse a data blob for a key as an X509 certificate. */ static int x509_key_preparse(struct key_preparsed_payload *prep) { struct x509_certificate *cert __free(x509_free_certificate); struct asymmetric_key_ids *kids __free(kfree) = NULL; char *p, *desc __free(kfree) = NULL; const char *q; size_t srlen, sulen; cert = x509_cert_parse(prep->data, prep->datalen); if (IS_ERR(cert)) return PTR_ERR(cert); pr_devel("Cert Issuer: %s\n", cert->issuer); pr_devel("Cert Subject: %s\n", cert->subject); pr_devel("Cert Key Algo: %s\n", cert->pub->pkey_algo); pr_devel("Cert Valid period: %lld-%lld\n", cert->valid_from, cert->valid_to); cert->pub->id_type = "X509"; if (cert->unsupported_sig) { public_key_signature_free(cert->sig); cert->sig = NULL; } else { pr_devel("Cert Signature: %s + %s\n", cert->sig->pkey_algo, cert->sig->hash_algo); } /* Don't permit addition of blacklisted keys */ if (cert->blacklisted) return -EKEYREJECTED; /* Propose a description */ sulen = strlen(cert->subject); if (cert->raw_skid) { srlen = cert->raw_skid_size; q = cert->raw_skid; } else { srlen = cert->raw_serial_size; q = cert->raw_serial; } desc = kmalloc(sulen + 2 + srlen * 2 + 1, GFP_KERNEL); if (!desc) return -ENOMEM; p = memcpy(desc, cert->subject, sulen); p += sulen; *p++ = ':'; *p++ = ' '; p = bin2hex(p, q, srlen); *p = 0; kids = kmalloc(sizeof(struct asymmetric_key_ids), GFP_KERNEL); if (!kids) return -ENOMEM; kids->id[0] = cert->id; kids->id[1] = cert->skid; kids->id[2] = asymmetric_key_generate_id(cert->raw_subject, cert->raw_subject_size, "", 0); if (IS_ERR(kids->id[2])) return PTR_ERR(kids->id[2]); /* We're pinning the module by being linked against it */ __module_get(public_key_subtype.owner); prep->payload.data[asym_subtype] = &public_key_subtype; prep->payload.data[asym_key_ids] = kids; prep->payload.data[asym_crypto] = cert->pub; prep->payload.data[asym_auth] = cert->sig; prep->description = desc; prep->quotalen = 100; /* We've finished with the certificate */ cert->pub = NULL; cert->id = NULL; cert->skid = NULL; cert->sig = NULL; desc = NULL; kids = NULL; return 0; } static struct asymmetric_key_parser x509_key_parser = { .owner = THIS_MODULE, .name = "x509", .parse = x509_key_preparse, }; /* * Module stuff */ static int __init x509_key_init(void) { return register_asymmetric_key_parser(&x509_key_parser); } static void __exit x509_key_exit(void) { unregister_asymmetric_key_parser(&x509_key_parser); } module_init(x509_key_init); module_exit(x509_key_exit); MODULE_DESCRIPTION("X.509 certificate parser"); MODULE_AUTHOR("Red Hat, Inc."); MODULE_LICENSE("GPL");
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 /* SPDX-License-Identifier: GPL-2.0-only */ /* * AppArmor security module * * This file contains AppArmor auditing function definitions. * * Copyright (C) 1998-2008 Novell/SUSE * Copyright 2009-2010 Canonical Ltd. */ #ifndef __AA_AUDIT_H #define __AA_AUDIT_H #include <linux/audit.h> #include <linux/fs.h> #include <linux/lsm_audit.h> #include <linux/sched.h> #include <linux/slab.h> #include "file.h" #include "label.h" extern const char *const audit_mode_names[]; #define AUDIT_MAX_INDEX 5 enum audit_mode { AUDIT_NORMAL, /* follow normal auditing of accesses */ AUDIT_QUIET_DENIED, /* quiet all denied access messages */ AUDIT_QUIET, /* quiet all messages */ AUDIT_NOQUIET, /* do not quiet audit messages */ AUDIT_ALL /* audit all accesses */ }; enum audit_type { AUDIT_APPARMOR_AUDIT, AUDIT_APPARMOR_ALLOWED, AUDIT_APPARMOR_DENIED, AUDIT_APPARMOR_HINT, AUDIT_APPARMOR_STATUS, AUDIT_APPARMOR_ERROR, AUDIT_APPARMOR_KILL, AUDIT_APPARMOR_AUTO }; #define OP_NULL NULL #define OP_SYSCTL "sysctl" #define OP_CAPABLE "capable" #define OP_UNLINK "unlink" #define OP_MKDIR "mkdir" #define OP_RMDIR "rmdir" #define OP_MKNOD "mknod" #define OP_TRUNC "truncate" #define OP_LINK "link" #define OP_SYMLINK "symlink" #define OP_RENAME_SRC "rename_src" #define OP_RENAME_DEST "rename_dest" #define OP_CHMOD "chmod" #define OP_CHOWN "chown" #define OP_GETATTR "getattr" #define OP_OPEN "open" #define OP_FRECEIVE "file_receive" #define OP_FPERM "file_perm" #define OP_FLOCK "file_lock" #define OP_FMMAP "file_mmap" #define OP_FMPROT "file_mprotect" #define OP_INHERIT "file_inherit" #define OP_PIVOTROOT "pivotroot" #define OP_MOUNT "mount" #define OP_UMOUNT "umount" #define OP_CREATE "create" #define OP_POST_CREATE "post_create" #define OP_BIND "bind" #define OP_CONNECT "connect" #define OP_LISTEN "listen" #define OP_ACCEPT "accept" #define OP_SENDMSG "sendmsg" #define OP_RECVMSG "recvmsg" #define OP_GETSOCKNAME "getsockname" #define OP_GETPEERNAME "getpeername" #define OP_GETSOCKOPT "getsockopt" #define OP_SETSOCKOPT "setsockopt" #define OP_SHUTDOWN "socket_shutdown" #define OP_PTRACE "ptrace" #define OP_SIGNAL "signal" #define OP_EXEC "exec" #define OP_CHANGE_HAT "change_hat" #define OP_CHANGE_PROFILE "change_profile" #define OP_CHANGE_ONEXEC "change_onexec" #define OP_STACK "stack" #define OP_STACK_ONEXEC "stack_onexec" #define OP_SETPROCATTR "setprocattr" #define OP_SETRLIMIT "setrlimit" #define OP_PROF_REPL "profile_replace" #define OP_PROF_LOAD "profile_load" #define OP_PROF_RM "profile_remove" #define OP_USERNS_CREATE "userns_create" #define OP_URING_OVERRIDE "uring_override" #define OP_URING_SQPOLL "uring_sqpoll" struct apparmor_audit_data { int error; int type; u16 class; const char *op; const struct cred *subj_cred; struct aa_label *subj_label; const char *name; const char *info; u32 request; u32 denied; union { /* these entries require a custom callback fn */ struct { struct aa_label *peer; union { struct { const char *target; kuid_t ouid; } fs; struct { int rlim; unsigned long max; } rlim; struct { int signal; int unmappedsig; }; struct { int type, protocol; void *addr; int addrlen; struct { void *addr; int addrlen; } peer; } net; }; }; struct { struct aa_profile *profile; const char *ns; long pos; } iface; struct { const char *src_name; const char *type; const char *trans; const char *data; unsigned long flags; } mnt; struct { struct aa_label *target; } uring; }; struct common_audit_data common; }; /* macros for dealing with apparmor_audit_data structure */ #define aad(SA) (container_of(SA, struct apparmor_audit_data, common)) #define aad_of_va(VA) aad((struct common_audit_data *)(VA)) #define DEFINE_AUDIT_DATA(NAME, T, C, X) \ /* TODO: cleanup audit init so we don't need _aad = {0,} */ \ struct apparmor_audit_data NAME = { \ .class = (C), \ .op = (X), \ .common.type = (T), \ .common.u.tsk = NULL, \ .common.apparmor_audit_data = &NAME, \ }; void aa_audit_msg(int type, struct apparmor_audit_data *ad, void (*cb) (struct audit_buffer *, void *)); int aa_audit(int type, struct aa_profile *profile, struct apparmor_audit_data *ad, void (*cb) (struct audit_buffer *, void *)); #define aa_audit_error(ERROR, AD, CB) \ ({ \ (AD)->error = (ERROR); \ aa_audit_msg(AUDIT_APPARMOR_ERROR, (AD), (CB)); \ (AD)->error; \ }) static inline int complain_error(int error) { if (error == -EPERM || error == -EACCES) return 0; return error; } void aa_audit_rule_free(void *vrule); int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule, gfp_t gfp); int aa_audit_rule_known(struct audit_krule *rule); int aa_audit_rule_match(struct lsm_prop *prop, u32 field, u32 op, void *vrule); #endif /* __AA_AUDIT_H */
2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 // SPDX-License-Identifier: GPL-2.0-only /* * Line 6 Linux USB driver * * Copyright (C) 2004-2010 Markus Grabner (line6@grabner-graz.at) */ #include <linux/slab.h> #include <sound/core.h> #include <sound/pcm.h> #include <sound/pcm_params.h> #include "capture.h" #include "driver.h" #include "pcm.h" #include "playback.h" /* Software stereo volume control. */ static void change_volume(struct urb *urb_out, int volume[], int bytes_per_frame) { int chn = 0; if (volume[0] == 256 && volume[1] == 256) return; /* maximum volume - no change */ if (bytes_per_frame == 4) { __le16 *p, *buf_end; p = (__le16 *)urb_out->transfer_buffer; buf_end = p + urb_out->transfer_buffer_length / sizeof(*p); for (; p < buf_end; ++p) { short pv = le16_to_cpu(*p); int val = (pv * volume[chn & 1]) >> 8; pv = clamp(val, -0x8000, 0x7fff); *p = cpu_to_le16(pv); ++chn; } } else if (bytes_per_frame == 6) { unsigned char *p, *buf_end; p = (unsigned char *)urb_out->transfer_buffer; buf_end = p + urb_out->transfer_buffer_length; for (; p < buf_end; p += 3) { int val; val = p[0] + (p[1] << 8) + ((signed char)p[2] << 16); val = (val * volume[chn & 1]) >> 8; val = clamp(val, -0x800000, 0x7fffff); p[0] = val; p[1] = val >> 8; p[2] = val >> 16; ++chn; } } } /* Create signal for impulse response test. */ static void create_impulse_test_signal(struct snd_line6_pcm *line6pcm, struct urb *urb_out, int bytes_per_frame) { int frames = urb_out->transfer_buffer_length / bytes_per_frame; if (bytes_per_frame == 4) { int i; short *pi = (short *)line6pcm->prev_fbuf; short *po = (short *)urb_out->transfer_buffer; for (i = 0; i < frames; ++i) { po[0] = pi[0]; po[1] = 0; pi += 2; po += 2; } } else if (bytes_per_frame == 6) { int i, j; unsigned char *pi = line6pcm->prev_fbuf; unsigned char *po = urb_out->transfer_buffer; for (i = 0; i < frames; ++i) { for (j = 0; j < bytes_per_frame / 2; ++j) po[j] = pi[j]; for (; j < bytes_per_frame; ++j) po[j] = 0; pi += bytes_per_frame; po += bytes_per_frame; } } if (--line6pcm->impulse_count <= 0) { ((unsigned char *)(urb_out->transfer_buffer))[bytes_per_frame - 1] = line6pcm->impulse_volume; line6pcm->impulse_count = line6pcm->impulse_period; } } /* Add signal to buffer for software monitoring. */ static void add_monitor_signal(struct urb *urb_out, unsigned char *signal, int volume, int bytes_per_frame) { if (volume == 0) return; /* zero volume - no change */ if (bytes_per_frame == 4) { __le16 *pi, *po, *buf_end; pi = (__le16 *)signal; po = (__le16 *)urb_out->transfer_buffer; buf_end = po + urb_out->transfer_buffer_length / sizeof(*po); for (; po < buf_end; ++pi, ++po) { short pov = le16_to_cpu(*po); short piv = le16_to_cpu(*pi); int val = pov + ((piv * volume) >> 8); pov = clamp(val, -0x8000, 0x7fff); *po = cpu_to_le16(pov); } } /* We don't need to handle devices with 6 bytes per frame here since they all support hardware monitoring. */ } /* Find a free URB, prepare audio data, and submit URB. must be called in line6pcm->out.lock context */ static int submit_audio_out_urb(struct snd_line6_pcm *line6pcm) { int index; int i, urb_size, urb_frames; int ret; const int bytes_per_frame = line6pcm->properties->bytes_per_channel * line6pcm->properties->playback_hw.channels_max; const int frame_increment = line6pcm->properties->rates.rats[0].num_min; const int frame_factor = line6pcm->properties->rates.rats[0].den * (line6pcm->line6->intervals_per_second / LINE6_ISO_INTERVAL); struct urb *urb_out; index = find_first_zero_bit(&line6pcm->out.active_urbs, line6pcm->line6->iso_buffers); if (index < 0 || index >= line6pcm->line6->iso_buffers) { dev_err(line6pcm->line6->ifcdev, "no free URB found\n"); return -EINVAL; } urb_out = line6pcm->out.urbs[index]; urb_size = 0; /* TODO: this may not work for LINE6_ISO_PACKETS != 1 */ for (i = 0; i < LINE6_ISO_PACKETS; ++i) { /* compute frame size for given sampling rate */ int fsize = 0; struct usb_iso_packet_descriptor *fout = &urb_out->iso_frame_desc[i]; fsize = line6pcm->prev_fsize; if (fsize == 0) { int n; line6pcm->out.count += frame_increment; n = line6pcm->out.count / frame_factor; line6pcm->out.count -= n * frame_factor; fsize = n; } fsize *= bytes_per_frame; fout->offset = urb_size; fout->length = fsize; urb_size += fsize; } if (urb_size == 0) { /* can't determine URB size */ dev_err(line6pcm->line6->ifcdev, "driver bug: urb_size = 0\n"); return -EINVAL; } urb_frames = urb_size / bytes_per_frame; urb_out->transfer_buffer = line6pcm->out.buffer + index * LINE6_ISO_PACKETS * line6pcm->max_packet_size_out; urb_out->transfer_buffer_length = urb_size; urb_out->context = line6pcm; if (test_bit(LINE6_STREAM_PCM, &line6pcm->out.running) && !test_bit(LINE6_FLAG_PAUSE_PLAYBACK, &line6pcm->flags)) { struct snd_pcm_runtime *runtime = get_substream(line6pcm, SNDRV_PCM_STREAM_PLAYBACK)->runtime; if (line6pcm->out.pos + urb_frames > runtime->buffer_size) { /* The transferred area goes over buffer boundary, copy the data to the temp buffer. */ int len; len = runtime->buffer_size - line6pcm->out.pos; if (len > 0) { memcpy(urb_out->transfer_buffer, runtime->dma_area + line6pcm->out.pos * bytes_per_frame, len * bytes_per_frame); memcpy(urb_out->transfer_buffer + len * bytes_per_frame, runtime->dma_area, (urb_frames - len) * bytes_per_frame); } else dev_err(line6pcm->line6->ifcdev, "driver bug: len = %d\n", len); } else { memcpy(urb_out->transfer_buffer, runtime->dma_area + line6pcm->out.pos * bytes_per_frame, urb_out->transfer_buffer_length); } line6pcm->out.pos += urb_frames; if (line6pcm->out.pos >= runtime->buffer_size) line6pcm->out.pos -= runtime->buffer_size; change_volume(urb_out, line6pcm->volume_playback, bytes_per_frame); } else { memset(urb_out->transfer_buffer, 0, urb_out->transfer_buffer_length); } spin_lock_nested(&line6pcm->in.lock, SINGLE_DEPTH_NESTING); if (line6pcm->prev_fbuf) { if (test_bit(LINE6_STREAM_IMPULSE, &line6pcm->out.running)) { create_impulse_test_signal(line6pcm, urb_out, bytes_per_frame); if (test_bit(LINE6_STREAM_PCM, &line6pcm->in.running)) { line6_capture_copy(line6pcm, urb_out->transfer_buffer, urb_out-> transfer_buffer_length); line6_capture_check_period(line6pcm, urb_out->transfer_buffer_length); } } else { if (!(line6pcm->line6->properties->capabilities & LINE6_CAP_HWMON) && line6pcm->out.running && line6pcm->in.running) add_monitor_signal(urb_out, line6pcm->prev_fbuf, line6pcm->volume_monitor, bytes_per_frame); } line6pcm->prev_fbuf = NULL; line6pcm->prev_fsize = 0; } spin_unlock(&line6pcm->in.lock); ret = usb_submit_urb(urb_out, GFP_ATOMIC); if (ret == 0) set_bit(index, &line6pcm->out.active_urbs); else dev_err(line6pcm->line6->ifcdev, "URB out #%d submission failed (%d)\n", index, ret); return 0; } /* Submit all currently available playback URBs. must be called in line6pcm->out.lock context */ int line6_submit_audio_out_all_urbs(struct snd_line6_pcm *line6pcm) { int ret = 0, i; for (i = 0; i < line6pcm->line6->iso_buffers; ++i) { ret = submit_audio_out_urb(line6pcm); if (ret < 0) break; } return ret; } /* Callback for completed playback URB. */ static void audio_out_callback(struct urb *urb) { int i, index, length = 0, shutdown = 0; unsigned long flags; struct snd_line6_pcm *line6pcm = (struct snd_line6_pcm *)urb->context; struct snd_pcm_substream *substream = get_substream(line6pcm, SNDRV_PCM_STREAM_PLAYBACK); const int bytes_per_frame = line6pcm->properties->bytes_per_channel * line6pcm->properties->playback_hw.channels_max; #if USE_CLEAR_BUFFER_WORKAROUND memset(urb->transfer_buffer, 0, urb->transfer_buffer_length); #endif line6pcm->out.last_frame = urb->start_frame; /* find index of URB */ for (index = 0; index < line6pcm->line6->iso_buffers; index++) if (urb == line6pcm->out.urbs[index]) break; if (index >= line6pcm->line6->iso_buffers) return; /* URB has been unlinked asynchronously */ for (i = 0; i < LINE6_ISO_PACKETS; i++) length += urb->iso_frame_desc[i].length; spin_lock_irqsave(&line6pcm->out.lock, flags); if (test_bit(LINE6_STREAM_PCM, &line6pcm->out.running)) { struct snd_pcm_runtime *runtime = substream->runtime; line6pcm->out.pos_done += length / bytes_per_frame; if (line6pcm->out.pos_done >= runtime->buffer_size) line6pcm->out.pos_done -= runtime->buffer_size; } clear_bit(index, &line6pcm->out.active_urbs); for (i = 0; i < LINE6_ISO_PACKETS; i++) if (urb->iso_frame_desc[i].status == -EXDEV) { shutdown = 1; break; } if (test_and_clear_bit(index, &line6pcm->out.unlink_urbs)) shutdown = 1; if (!shutdown) { submit_audio_out_urb(line6pcm); if (test_bit(LINE6_STREAM_PCM, &line6pcm->out.running)) { line6pcm->out.bytes += length; if (line6pcm->out.bytes >= line6pcm->out.period) { line6pcm->out.bytes %= line6pcm->out.period; spin_unlock(&line6pcm->out.lock); snd_pcm_period_elapsed(substream); spin_lock(&line6pcm->out.lock); } } } spin_unlock_irqrestore(&line6pcm->out.lock, flags); } /* open playback callback */ static int snd_line6_playback_open(struct snd_pcm_substream *substream) { int err; struct snd_pcm_runtime *runtime = substream->runtime; struct snd_line6_pcm *line6pcm = snd_pcm_substream_chip(substream); err = snd_pcm_hw_constraint_ratdens(runtime, 0, SNDRV_PCM_HW_PARAM_RATE, &line6pcm->properties->rates); if (err < 0) return err; runtime->hw = line6pcm->properties->playback_hw; return 0; } /* close playback callback */ static int snd_line6_playback_close(struct snd_pcm_substream *substream) { return 0; } /* playback operators */ const struct snd_pcm_ops snd_line6_playback_ops = { .open = snd_line6_playback_open, .close = snd_line6_playback_close, .hw_params = snd_line6_hw_params, .hw_free = snd_line6_hw_free, .prepare = snd_line6_prepare, .trigger = snd_line6_trigger, .pointer = snd_line6_pointer, }; int line6_create_audio_out_urbs(struct snd_line6_pcm *line6pcm) { struct usb_line6 *line6 = line6pcm->line6; int i; line6pcm->out.urbs = kcalloc(line6->iso_buffers, sizeof(struct urb *), GFP_KERNEL); if (line6pcm->out.urbs == NULL) return -ENOMEM; /* create audio URBs and fill in constant values: */ for (i = 0; i < line6->iso_buffers; ++i) { struct urb *urb; /* URB for audio out: */ urb = line6pcm->out.urbs[i] = usb_alloc_urb(LINE6_ISO_PACKETS, GFP_KERNEL); if (urb == NULL) return -ENOMEM; urb->dev = line6->usbdev; urb->pipe = usb_sndisocpipe(line6->usbdev, line6->properties->ep_audio_w & USB_ENDPOINT_NUMBER_MASK); urb->transfer_flags = URB_ISO_ASAP; urb->start_frame = -1; urb->number_of_packets = LINE6_ISO_PACKETS; urb->interval = LINE6_ISO_INTERVAL; urb->error_count = 0; urb->complete = audio_out_callback; if (usb_urb_ep_type_check(urb)) return -EINVAL; } return 0; }
52 45 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. * Authors: David Chinner and Glauber Costa * * Generic LRU infrastructure */ #ifndef _LRU_LIST_H #define _LRU_LIST_H #include <linux/list.h> #include <linux/nodemask.h> #include <linux/shrinker.h> #include <linux/xarray.h> struct mem_cgroup; /* list_lru_walk_cb has to always return one of those */ enum lru_status { LRU_REMOVED, /* item removed from list */ LRU_REMOVED_RETRY, /* item removed, but lock has been dropped and reacquired */ LRU_ROTATE, /* item referenced, give another pass */ LRU_SKIP, /* item cannot be locked, skip */ LRU_RETRY, /* item not freeable. May drop the lock internally, but has to return locked. */ LRU_STOP, /* stop lru list walking. May drop the lock internally, but has to return locked. */ }; struct list_lru_one { struct list_head list; /* may become negative during memcg reparenting */ long nr_items; /* protects all fields above */ spinlock_t lock; }; struct list_lru_memcg { struct rcu_head rcu; /* array of per cgroup per node lists, indexed by node id */ struct list_lru_one node[]; }; struct list_lru_node { /* global list, used for the root cgroup in cgroup aware lrus */ struct list_lru_one lru; atomic_long_t nr_items; } ____cacheline_aligned_in_smp; struct list_lru { struct list_lru_node *node; #ifdef CONFIG_MEMCG struct list_head list; int shrinker_id; bool memcg_aware; struct xarray xa; #endif #ifdef CONFIG_LOCKDEP struct lock_class_key *key; #endif }; void list_lru_destroy(struct list_lru *lru); int __list_lru_init(struct list_lru *lru, bool memcg_aware, struct shrinker *shrinker); #define list_lru_init(lru) \ __list_lru_init((lru), false, NULL) #define list_lru_init_memcg(lru, shrinker) \ __list_lru_init((lru), true, shrinker) static inline int list_lru_init_memcg_key(struct list_lru *lru, struct shrinker *shrinker, struct lock_class_key *key) { #ifdef CONFIG_LOCKDEP lru->key = key; #endif return list_lru_init_memcg(lru, shrinker); } int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru, gfp_t gfp); void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent); /** * list_lru_add: add an element to the lru list's tail * @lru: the lru pointer * @item: the item to be added. * @nid: the node id of the sublist to add the item to. * @memcg: the cgroup of the sublist to add the item to. * * If the element is already part of a list, this function returns doing * nothing. This means that it is not necessary to keep state about whether or * not the element already belongs in the list. That said, this logic only * works if the item is in *this* list. If the item might be in some other * list, then you cannot rely on this check and you must remove it from the * other list before trying to insert it. * * The lru list consists of many sublists internally; the @nid and @memcg * parameters are used to determine which sublist to insert the item into. * It's important to use the right value of @nid and @memcg when deleting the * item, since it might otherwise get deleted from the wrong sublist. * * This also applies when attempting to insert the item multiple times - if * the item is currently in one sublist and you call list_lru_add() again, you * must pass the right @nid and @memcg parameters so that the same sublist is * used. * * You must ensure that the memcg is not freed during this call (e.g., with * rcu or by taking a css refcnt). * * Return: true if the list was updated, false otherwise */ bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid, struct mem_cgroup *memcg); /** * list_lru_add_obj: add an element to the lru list's tail * @lru: the lru pointer * @item: the item to be added. * * This function is similar to list_lru_add(), but the NUMA node and the * memcg of the sublist is determined by @item list_head. This assumption is * valid for slab objects LRU such as dentries, inodes, etc. * * Return: true if the list was updated, false otherwise */ bool list_lru_add_obj(struct list_lru *lru, struct list_head *item); /** * list_lru_del: delete an element from the lru list * @lru: the lru pointer * @item: the item to be deleted. * @nid: the node id of the sublist to delete the item from. * @memcg: the cgroup of the sublist to delete the item from. * * This function works analogously as list_lru_add() in terms of list * manipulation. * * The comments in list_lru_add() about an element already being in a list are * also valid for list_lru_del(), that is, you can delete an item that has * already been removed or never been added. However, if the item is in a * list, it must be in *this* list, and you must pass the right value of @nid * and @memcg so that the right sublist is used. * * You must ensure that the memcg is not freed during this call (e.g., with * rcu or by taking a css refcnt). When a memcg is deleted, list_lru entries * are automatically moved to the parent memcg. This is done in a race-free * way, so during deletion of an memcg both the old and new memcg will resolve * to the same sublist internally. * * Return: true if the list was updated, false otherwise */ bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid, struct mem_cgroup *memcg); /** * list_lru_del_obj: delete an element from the lru list * @lru: the lru pointer * @item: the item to be deleted. * * This function is similar to list_lru_del(), but the NUMA node and the * memcg of the sublist is determined by @item list_head. This assumption is * valid for slab objects LRU such as dentries, inodes, etc. * * Return: true if the list was updated, false otherwise. */ bool list_lru_del_obj(struct list_lru *lru, struct list_head *item); /** * list_lru_count_one: return the number of objects currently held by @lru * @lru: the lru pointer. * @nid: the node id to count from. * @memcg: the cgroup to count from. * * There is no guarantee that the list is not updated while the count is being * computed. Callers that want such a guarantee need to provide an outer lock. * * Return: 0 for empty lists, otherwise the number of objects * currently held by @lru. */ unsigned long list_lru_count_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg); unsigned long list_lru_count_node(struct list_lru *lru, int nid); static inline unsigned long list_lru_shrink_count(struct list_lru *lru, struct shrink_control *sc) { return list_lru_count_one(lru, sc->nid, sc->memcg); } static inline unsigned long list_lru_count(struct list_lru *lru) { long count = 0; int nid; for_each_node_state(nid, N_NORMAL_MEMORY) count += list_lru_count_node(lru, nid); return count; } void list_lru_isolate(struct list_lru_one *list, struct list_head *item); void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item, struct list_head *head); typedef enum lru_status (*list_lru_walk_cb)(struct list_head *item, struct list_lru_one *list, void *cb_arg); /** * list_lru_walk_one: walk a @lru, isolating and disposing freeable items. * @lru: the lru pointer. * @nid: the node id to scan from. * @memcg: the cgroup to scan from. * @isolate: callback function that is responsible for deciding what to do with * the item currently being scanned * @cb_arg: opaque type that will be passed to @isolate * @nr_to_walk: how many items to scan. * * This function will scan all elements in a particular @lru, calling the * @isolate callback for each of those items, along with the current list * spinlock and a caller-provided opaque. The @isolate callback can choose to * drop the lock internally, but *must* return with the lock held. The callback * will return an enum lru_status telling the @lru infrastructure what to * do with the object being scanned. * * Please note that @nr_to_walk does not mean how many objects will be freed, * just how many objects will be scanned. * * Return: the number of objects effectively removed from the LRU. */ unsigned long list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg, list_lru_walk_cb isolate, void *cb_arg, unsigned long *nr_to_walk); /** * list_lru_walk_one_irq: walk a @lru, isolating and disposing freeable items. * @lru: the lru pointer. * @nid: the node id to scan from. * @memcg: the cgroup to scan from. * @isolate: callback function that is responsible for deciding what to do with * the item currently being scanned * @cb_arg: opaque type that will be passed to @isolate * @nr_to_walk: how many items to scan. * * Same as list_lru_walk_one() except that the spinlock is acquired with * spin_lock_irq(). */ unsigned long list_lru_walk_one_irq(struct list_lru *lru, int nid, struct mem_cgroup *memcg, list_lru_walk_cb isolate, void *cb_arg, unsigned long *nr_to_walk); unsigned long list_lru_walk_node(struct list_lru *lru, int nid, list_lru_walk_cb isolate, void *cb_arg, unsigned long *nr_to_walk); static inline unsigned long list_lru_shrink_walk(struct list_lru *lru, struct shrink_control *sc, list_lru_walk_cb isolate, void *cb_arg) { return list_lru_walk_one(lru, sc->nid, sc->memcg, isolate, cb_arg, &sc->nr_to_scan); } static inline unsigned long list_lru_shrink_walk_irq(struct list_lru *lru, struct shrink_control *sc, list_lru_walk_cb isolate, void *cb_arg) { return list_lru_walk_one_irq(lru, sc->nid, sc->memcg, isolate, cb_arg, &sc->nr_to_scan); } static inline unsigned long list_lru_walk(struct list_lru *lru, list_lru_walk_cb isolate, void *cb_arg, unsigned long nr_to_walk) { long isolated = 0; int nid; for_each_node_state(nid, N_NORMAL_MEMORY) { isolated += list_lru_walk_node(lru, nid, isolate, cb_arg, &nr_to_walk); if (nr_to_walk <= 0) break; } return isolated; } #endif /* _LRU_LIST_H */
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 // SPDX-License-Identifier: GPL-2.0-or-later /* * HID driver for Gembird Joypad, "PC Game Controller" * * Copyright (c) 2015 Red Hat, Inc * Copyright (c) 2015 Benjamin Tissoires */ /* */ #include <linux/device.h> #include <linux/hid.h> #include <linux/module.h> #include "hid-ids.h" #define GEMBIRD_START_FAULTY_RDESC 8 static const __u8 gembird_jpd_faulty_rdesc[] = { 0x75, 0x08, /* Report Size (8) */ 0x95, 0x05, /* Report Count (5) */ 0x15, 0x00, /* Logical Minimum (0) */ 0x26, 0xff, 0x00, /* Logical Maximum (255) */ 0x35, 0x00, /* Physical Minimum (0) */ 0x46, 0xff, 0x00, /* Physical Maximum (255) */ 0x09, 0x30, /* Usage (X) */ 0x09, 0x31, /* Usage (Y) */ 0x09, 0x32, /* Usage (Z) */ 0x09, 0x32, /* Usage (Z) */ 0x09, 0x35, /* Usage (Rz) */ 0x81, 0x02, /* Input (Data,Var,Abs) */ }; /* * we fix the report descriptor by: * - marking the first Z axis as constant (so it is ignored by HID) * - assign the original second Z to Rx * - assign the original Rz to Ry */ static const __u8 gembird_jpd_fixed_rdesc[] = { 0x75, 0x08, /* Report Size (8) */ 0x95, 0x02, /* Report Count (2) */ 0x15, 0x00, /* Logical Minimum (0) */ 0x26, 0xff, 0x00, /* Logical Maximum (255) */ 0x35, 0x00, /* Physical Minimum (0) */ 0x46, 0xff, 0x00, /* Physical Maximum (255) */ 0x09, 0x30, /* Usage (X) */ 0x09, 0x31, /* Usage (Y) */ 0x81, 0x02, /* Input (Data,Var,Abs) */ 0x95, 0x01, /* Report Count (1) */ 0x09, 0x32, /* Usage (Z) */ 0x81, 0x01, /* Input (Cnst,Arr,Abs) */ 0x95, 0x02, /* Report Count (2) */ 0x09, 0x33, /* Usage (Rx) */ 0x09, 0x34, /* Usage (Ry) */ 0x81, 0x02, /* Input (Data,Var,Abs) */ }; static const __u8 *gembird_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { __u8 *new_rdesc; /* delta_size is > 0 */ size_t delta_size = sizeof(gembird_jpd_fixed_rdesc) - sizeof(gembird_jpd_faulty_rdesc); size_t new_size = *rsize + delta_size; if (*rsize >= 31 && !memcmp(&rdesc[GEMBIRD_START_FAULTY_RDESC], gembird_jpd_faulty_rdesc, sizeof(gembird_jpd_faulty_rdesc))) { new_rdesc = devm_kzalloc(&hdev->dev, new_size, GFP_KERNEL); if (new_rdesc == NULL) return rdesc; dev_info(&hdev->dev, "fixing Gembird JPD-DualForce 2 report descriptor.\n"); /* start by copying the end of the rdesc */ memcpy(new_rdesc + delta_size, rdesc, *rsize); /* add the correct beginning */ memcpy(new_rdesc, rdesc, GEMBIRD_START_FAULTY_RDESC); /* replace the faulty part with the fixed one */ memcpy(new_rdesc + GEMBIRD_START_FAULTY_RDESC, gembird_jpd_fixed_rdesc, sizeof(gembird_jpd_fixed_rdesc)); *rsize = new_size; rdesc = new_rdesc; } return rdesc; } static const struct hid_device_id gembird_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_GEMBIRD, USB_DEVICE_ID_GEMBIRD_JPD_DUALFORCE2) }, { } }; MODULE_DEVICE_TABLE(hid, gembird_devices); static struct hid_driver gembird_driver = { .name = "gembird", .id_table = gembird_devices, .report_fixup = gembird_report_fixup, }; module_hid_driver(gembird_driver); MODULE_AUTHOR("Benjamin Tissoires <benjamin.tissoires@gmail.com>"); MODULE_DESCRIPTION("HID Gembird joypad driver"); MODULE_LICENSE("GPL");
3 3 3 1 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 // SPDX-License-Identifier: GPL-2.0-only /* * V9FS cache definitions. * * Copyright (C) 2009 by Abhishek Kulkarni <adkulkar@umail.iu.edu> */ #include <linux/jiffies.h> #include <linux/file.h> #include <linux/slab.h> #include <linux/stat.h> #include <linux/sched.h> #include <linux/fs.h> #include <net/9p/9p.h> #include "v9fs.h" #include "cache.h" int v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses, const char *dev_name) { struct fscache_volume *vcookie; char *name, *p; name = kasprintf(GFP_KERNEL, "9p,%s,%s", dev_name, v9ses->cachetag ?: v9ses->aname); if (!name) return -ENOMEM; for (p = name; *p; p++) if (*p == '/') *p = ';'; vcookie = fscache_acquire_volume(name, NULL, NULL, 0); p9_debug(P9_DEBUG_FSC, "session %p get volume %p (%s)\n", v9ses, vcookie, name); if (IS_ERR(vcookie)) { if (vcookie != ERR_PTR(-EBUSY)) { kfree(name); return PTR_ERR(vcookie); } pr_err("Cache volume key already in use (%s)\n", name); vcookie = NULL; } v9ses->fscache = vcookie; kfree(name); return 0; } void v9fs_cache_inode_get_cookie(struct inode *inode) { struct v9fs_inode *v9inode = V9FS_I(inode); struct v9fs_session_info *v9ses; __le32 version; __le64 path; if (!S_ISREG(inode->i_mode)) return; if (WARN_ON(v9fs_inode_cookie(v9inode))) return; version = cpu_to_le32(v9inode->qid.version); path = cpu_to_le64(v9inode->qid.path); v9ses = v9fs_inode2v9ses(inode); v9inode->netfs.cache = fscache_acquire_cookie(v9fs_session_cache(v9ses), 0, &path, sizeof(path), &version, sizeof(version), i_size_read(&v9inode->netfs.inode)); if (v9inode->netfs.cache) mapping_set_release_always(inode->i_mapping); p9_debug(P9_DEBUG_FSC, "inode %p get cookie %p\n", inode, v9fs_inode_cookie(v9inode)); }
3 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 // SPDX-License-Identifier: GPL-2.0+ /* * MCT (Magic Control Technology Corp.) USB RS232 Converter Driver * * Copyright (C) 2000 Wolfgang Grandegger (wolfgang@ces.ch) * * This program is largely derived from the Belkin USB Serial Adapter Driver * (see belkin_sa.[ch]). All of the information about the device was acquired * by using SniffUSB on Windows98. For technical details see mct_u232.h. * * William G. Greathouse and Greg Kroah-Hartman provided great help on how to * do the reverse engineering and how to write a USB serial device driver. * * TO BE DONE, TO BE CHECKED: * DTR/RTS signal handling may be incomplete or incorrect. I have mainly * implemented what I have seen with SniffUSB or found in belkin_sa.c. * For further TODOs check also belkin_sa.c. */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/tty_driver.h> #include <linux/tty_flip.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/uaccess.h> #include <linux/unaligned.h> #include <linux/usb.h> #include <linux/usb/serial.h> #include <linux/serial.h> #include "mct_u232.h" #define DRIVER_AUTHOR "Wolfgang Grandegger <wolfgang@ces.ch>" #define DRIVER_DESC "Magic Control Technology USB-RS232 converter driver" /* * Function prototypes */ static int mct_u232_port_probe(struct usb_serial_port *port); static void mct_u232_port_remove(struct usb_serial_port *remove); static int mct_u232_open(struct tty_struct *tty, struct usb_serial_port *port); static void mct_u232_close(struct usb_serial_port *port); static void mct_u232_dtr_rts(struct usb_serial_port *port, int on); static void mct_u232_read_int_callback(struct urb *urb); static void mct_u232_set_termios(struct tty_struct *tty, struct usb_serial_port *port, const struct ktermios *old_termios); static int mct_u232_break_ctl(struct tty_struct *tty, int break_state); static int mct_u232_tiocmget(struct tty_struct *tty); static int mct_u232_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear); static void mct_u232_throttle(struct tty_struct *tty); static void mct_u232_unthrottle(struct tty_struct *tty); /* * All of the device info needed for the MCT USB-RS232 converter. */ static const struct usb_device_id id_table[] = { { USB_DEVICE(MCT_U232_VID, MCT_U232_PID) }, { USB_DEVICE(MCT_U232_VID, MCT_U232_SITECOM_PID) }, { USB_DEVICE(MCT_U232_VID, MCT_U232_DU_H3SP_PID) }, { USB_DEVICE(MCT_U232_BELKIN_F5U109_VID, MCT_U232_BELKIN_F5U109_PID) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, id_table); static struct usb_serial_driver mct_u232_device = { .driver = { .name = "mct_u232", }, .description = "MCT U232", .id_table = id_table, .num_ports = 1, .open = mct_u232_open, .close = mct_u232_close, .dtr_rts = mct_u232_dtr_rts, .throttle = mct_u232_throttle, .unthrottle = mct_u232_unthrottle, .read_int_callback = mct_u232_read_int_callback, .set_termios = mct_u232_set_termios, .break_ctl = mct_u232_break_ctl, .tiocmget = mct_u232_tiocmget, .tiocmset = mct_u232_tiocmset, .tiocmiwait = usb_serial_generic_tiocmiwait, .port_probe = mct_u232_port_probe, .port_remove = mct_u232_port_remove, .get_icount = usb_serial_generic_get_icount, }; static struct usb_serial_driver * const serial_drivers[] = { &mct_u232_device, NULL }; struct mct_u232_private { struct urb *read_urb; spinlock_t lock; unsigned int control_state; /* Modem Line Setting (TIOCM) */ unsigned char last_lcr; /* Line Control Register */ unsigned char last_lsr; /* Line Status Register */ unsigned char last_msr; /* Modem Status Register */ unsigned int rx_flags; /* Throttling flags */ }; #define THROTTLED 0x01 /* * Handle vendor specific USB requests */ #define WDR_TIMEOUT 5000 /* default urb timeout */ /* * Later day 2.6.0-test kernels have new baud rates like B230400 which * we do not know how to support. We ignore them for the moment. */ static int mct_u232_calculate_baud_rate(struct usb_serial *serial, speed_t value, speed_t *result) { *result = value; if (le16_to_cpu(serial->dev->descriptor.idProduct) == MCT_U232_SITECOM_PID || le16_to_cpu(serial->dev->descriptor.idProduct) == MCT_U232_BELKIN_F5U109_PID) { switch (value) { case 300: return 0x01; case 600: return 0x02; /* this one not tested */ case 1200: return 0x03; case 2400: return 0x04; case 4800: return 0x06; case 9600: return 0x08; case 19200: return 0x09; case 38400: return 0x0a; case 57600: return 0x0b; case 115200: return 0x0c; default: *result = 9600; return 0x08; } } else { /* FIXME: Can we use any divider - should we do divider = 115200/value; real baud = 115200/divider */ switch (value) { case 300: break; case 600: break; case 1200: break; case 2400: break; case 4800: break; case 9600: break; case 19200: break; case 38400: break; case 57600: break; case 115200: break; default: value = 9600; *result = 9600; } return 115200/value; } } static int mct_u232_set_baud_rate(struct tty_struct *tty, struct usb_serial *serial, struct usb_serial_port *port, speed_t value) { unsigned int divisor; int rc; unsigned char *buf; unsigned char cts_enable_byte = 0; speed_t speed; buf = kmalloc(MCT_U232_MAX_SIZE, GFP_KERNEL); if (buf == NULL) return -ENOMEM; divisor = mct_u232_calculate_baud_rate(serial, value, &speed); put_unaligned_le32(divisor, buf); rc = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), MCT_U232_SET_BAUD_RATE_REQUEST, MCT_U232_SET_REQUEST_TYPE, 0, 0, buf, MCT_U232_SET_BAUD_RATE_SIZE, WDR_TIMEOUT); if (rc < 0) /*FIXME: What value speed results */ dev_err(&port->dev, "Set BAUD RATE %d failed (error = %d)\n", value, rc); else tty_encode_baud_rate(tty, speed, speed); dev_dbg(&port->dev, "set_baud_rate: value: 0x%x, divisor: 0x%x\n", value, divisor); /* Mimic the MCT-supplied Windows driver (version 1.21P.0104), which always sends two extra USB 'device request' messages after the 'baud rate change' message. The actual functionality of the request codes in these messages is not fully understood but these particular codes are never seen in any operation besides a baud rate change. Both of these messages send a single byte of data. In the first message, the value of this byte is always zero. The second message has been determined experimentally to control whether data will be transmitted to a device which is not asserting the 'CTS' signal. If the second message's data byte is zero, data will be transmitted even if 'CTS' is not asserted (i.e. no hardware flow control). if the second message's data byte is nonzero (a value of 1 is used by this driver), data will not be transmitted to a device which is not asserting 'CTS'. */ buf[0] = 0; rc = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), MCT_U232_SET_UNKNOWN1_REQUEST, MCT_U232_SET_REQUEST_TYPE, 0, 0, buf, MCT_U232_SET_UNKNOWN1_SIZE, WDR_TIMEOUT); if (rc < 0) dev_err(&port->dev, "Sending USB device request code %d " "failed (error = %d)\n", MCT_U232_SET_UNKNOWN1_REQUEST, rc); if (port && C_CRTSCTS(tty)) cts_enable_byte = 1; dev_dbg(&port->dev, "set_baud_rate: send second control message, data = %02X\n", cts_enable_byte); buf[0] = cts_enable_byte; rc = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), MCT_U232_SET_CTS_REQUEST, MCT_U232_SET_REQUEST_TYPE, 0, 0, buf, MCT_U232_SET_CTS_SIZE, WDR_TIMEOUT); if (rc < 0) dev_err(&port->dev, "Sending USB device request code %d " "failed (error = %d)\n", MCT_U232_SET_CTS_REQUEST, rc); kfree(buf); return rc; } /* mct_u232_set_baud_rate */ static int mct_u232_set_line_ctrl(struct usb_serial_port *port, unsigned char lcr) { int rc; unsigned char *buf; buf = kmalloc(MCT_U232_MAX_SIZE, GFP_KERNEL); if (buf == NULL) return -ENOMEM; buf[0] = lcr; rc = usb_control_msg(port->serial->dev, usb_sndctrlpipe(port->serial->dev, 0), MCT_U232_SET_LINE_CTRL_REQUEST, MCT_U232_SET_REQUEST_TYPE, 0, 0, buf, MCT_U232_SET_LINE_CTRL_SIZE, WDR_TIMEOUT); if (rc < 0) dev_err(&port->dev, "Set LINE CTRL 0x%x failed (error = %d)\n", lcr, rc); dev_dbg(&port->dev, "set_line_ctrl: 0x%x\n", lcr); kfree(buf); return rc; } /* mct_u232_set_line_ctrl */ static int mct_u232_set_modem_ctrl(struct usb_serial_port *port, unsigned int control_state) { int rc; unsigned char mcr; unsigned char *buf; buf = kmalloc(MCT_U232_MAX_SIZE, GFP_KERNEL); if (buf == NULL) return -ENOMEM; mcr = MCT_U232_MCR_NONE; if (control_state & TIOCM_DTR) mcr |= MCT_U232_MCR_DTR; if (control_state & TIOCM_RTS) mcr |= MCT_U232_MCR_RTS; buf[0] = mcr; rc = usb_control_msg(port->serial->dev, usb_sndctrlpipe(port->serial->dev, 0), MCT_U232_SET_MODEM_CTRL_REQUEST, MCT_U232_SET_REQUEST_TYPE, 0, 0, buf, MCT_U232_SET_MODEM_CTRL_SIZE, WDR_TIMEOUT); kfree(buf); dev_dbg(&port->dev, "set_modem_ctrl: state=0x%x ==> mcr=0x%x\n", control_state, mcr); if (rc < 0) { dev_err(&port->dev, "Set MODEM CTRL 0x%x failed (error = %d)\n", mcr, rc); return rc; } return 0; } /* mct_u232_set_modem_ctrl */ static int mct_u232_get_modem_stat(struct usb_serial_port *port, unsigned char *msr) { int rc; unsigned char *buf; buf = kmalloc(MCT_U232_MAX_SIZE, GFP_KERNEL); if (buf == NULL) { *msr = 0; return -ENOMEM; } rc = usb_control_msg(port->serial->dev, usb_rcvctrlpipe(port->serial->dev, 0), MCT_U232_GET_MODEM_STAT_REQUEST, MCT_U232_GET_REQUEST_TYPE, 0, 0, buf, MCT_U232_GET_MODEM_STAT_SIZE, WDR_TIMEOUT); if (rc < MCT_U232_GET_MODEM_STAT_SIZE) { dev_err(&port->dev, "Get MODEM STATus failed (error = %d)\n", rc); if (rc >= 0) rc = -EIO; *msr = 0; } else { *msr = buf[0]; } dev_dbg(&port->dev, "get_modem_stat: 0x%x\n", *msr); kfree(buf); return rc; } /* mct_u232_get_modem_stat */ static void mct_u232_msr_to_icount(struct async_icount *icount, unsigned char msr) { /* Translate Control Line states */ if (msr & MCT_U232_MSR_DDSR) icount->dsr++; if (msr & MCT_U232_MSR_DCTS) icount->cts++; if (msr & MCT_U232_MSR_DRI) icount->rng++; if (msr & MCT_U232_MSR_DCD) icount->dcd++; } /* mct_u232_msr_to_icount */ static void mct_u232_msr_to_state(struct usb_serial_port *port, unsigned int *control_state, unsigned char msr) { /* Translate Control Line states */ if (msr & MCT_U232_MSR_DSR) *control_state |= TIOCM_DSR; else *control_state &= ~TIOCM_DSR; if (msr & MCT_U232_MSR_CTS) *control_state |= TIOCM_CTS; else *control_state &= ~TIOCM_CTS; if (msr & MCT_U232_MSR_RI) *control_state |= TIOCM_RI; else *control_state &= ~TIOCM_RI; if (msr & MCT_U232_MSR_CD) *control_state |= TIOCM_CD; else *control_state &= ~TIOCM_CD; dev_dbg(&port->dev, "msr_to_state: msr=0x%x ==> state=0x%x\n", msr, *control_state); } /* mct_u232_msr_to_state */ /* * Driver's tty interface functions */ static int mct_u232_port_probe(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; struct mct_u232_private *priv; /* check first to simplify error handling */ if (!serial->port[1] || !serial->port[1]->interrupt_in_urb) { dev_err(&port->dev, "expected endpoint missing\n"); return -ENODEV; } priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; /* Use second interrupt-in endpoint for reading. */ priv->read_urb = serial->port[1]->interrupt_in_urb; priv->read_urb->context = port; spin_lock_init(&priv->lock); usb_set_serial_port_data(port, priv); return 0; } static void mct_u232_port_remove(struct usb_serial_port *port) { struct mct_u232_private *priv; priv = usb_get_serial_port_data(port); kfree(priv); } static int mct_u232_open(struct tty_struct *tty, struct usb_serial_port *port) { struct usb_serial *serial = port->serial; struct mct_u232_private *priv = usb_get_serial_port_data(port); int retval = 0; unsigned int control_state; unsigned long flags; unsigned char last_lcr; unsigned char last_msr; /* Compensate for a hardware bug: although the Sitecom U232-P25 * device reports a maximum output packet size of 32 bytes, * it seems to be able to accept only 16 bytes (and that's what * SniffUSB says too...) */ if (le16_to_cpu(serial->dev->descriptor.idProduct) == MCT_U232_SITECOM_PID) port->bulk_out_size = 16; /* Do a defined restart: the normal serial device seems to * always turn on DTR and RTS here, so do the same. I'm not * sure if this is really necessary. But it should not harm * either. */ spin_lock_irqsave(&priv->lock, flags); if (tty && C_BAUD(tty)) priv->control_state = TIOCM_DTR | TIOCM_RTS; else priv->control_state = 0; priv->last_lcr = (MCT_U232_DATA_BITS_8 | MCT_U232_PARITY_NONE | MCT_U232_STOP_BITS_1); control_state = priv->control_state; last_lcr = priv->last_lcr; spin_unlock_irqrestore(&priv->lock, flags); mct_u232_set_modem_ctrl(port, control_state); mct_u232_set_line_ctrl(port, last_lcr); /* Read modem status and update control state */ mct_u232_get_modem_stat(port, &last_msr); spin_lock_irqsave(&priv->lock, flags); priv->last_msr = last_msr; mct_u232_msr_to_state(port, &priv->control_state, priv->last_msr); spin_unlock_irqrestore(&priv->lock, flags); retval = usb_submit_urb(priv->read_urb, GFP_KERNEL); if (retval) { dev_err(&port->dev, "usb_submit_urb(read) failed pipe 0x%x err %d\n", port->read_urb->pipe, retval); goto error; } retval = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL); if (retval) { usb_kill_urb(priv->read_urb); dev_err(&port->dev, "usb_submit_urb(read int) failed pipe 0x%x err %d", port->interrupt_in_urb->pipe, retval); goto error; } return 0; error: return retval; } /* mct_u232_open */ static void mct_u232_dtr_rts(struct usb_serial_port *port, int on) { unsigned int control_state; struct mct_u232_private *priv = usb_get_serial_port_data(port); spin_lock_irq(&priv->lock); if (on) priv->control_state |= TIOCM_DTR | TIOCM_RTS; else priv->control_state &= ~(TIOCM_DTR | TIOCM_RTS); control_state = priv->control_state; spin_unlock_irq(&priv->lock); mct_u232_set_modem_ctrl(port, control_state); } static void mct_u232_close(struct usb_serial_port *port) { struct mct_u232_private *priv = usb_get_serial_port_data(port); usb_kill_urb(priv->read_urb); usb_kill_urb(port->interrupt_in_urb); usb_serial_generic_close(port); } /* mct_u232_close */ static void mct_u232_read_int_callback(struct urb *urb) { struct usb_serial_port *port = urb->context; struct mct_u232_private *priv = usb_get_serial_port_data(port); unsigned char *data = urb->transfer_buffer; int retval; int status = urb->status; unsigned long flags; switch (status) { case 0: /* success */ break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: /* this urb is terminated, clean up */ dev_dbg(&port->dev, "%s - urb shutting down with status: %d\n", __func__, status); return; default: dev_dbg(&port->dev, "%s - nonzero urb status received: %d\n", __func__, status); goto exit; } usb_serial_debug_data(&port->dev, __func__, urb->actual_length, data); /* * Work-a-round: handle the 'usual' bulk-in pipe here */ if (urb->transfer_buffer_length > 2) { if (urb->actual_length) { tty_insert_flip_string(&port->port, data, urb->actual_length); tty_flip_buffer_push(&port->port); } goto exit; } /* * The interrupt-in pipe signals exceptional conditions (modem line * signal changes and errors). data[0] holds MSR, data[1] holds LSR. */ spin_lock_irqsave(&priv->lock, flags); priv->last_msr = data[MCT_U232_MSR_INDEX]; /* Record Control Line states */ mct_u232_msr_to_state(port, &priv->control_state, priv->last_msr); mct_u232_msr_to_icount(&port->icount, priv->last_msr); #if 0 /* Not yet handled. See belkin_sa.c for further information */ /* Now to report any errors */ priv->last_lsr = data[MCT_U232_LSR_INDEX]; /* * fill in the flip buffer here, but I do not know the relation * to the current/next receive buffer or characters. I need * to look in to this before committing any code. */ if (priv->last_lsr & MCT_U232_LSR_ERR) { tty = tty_port_tty_get(&port->port); /* Overrun Error */ if (priv->last_lsr & MCT_U232_LSR_OE) { } /* Parity Error */ if (priv->last_lsr & MCT_U232_LSR_PE) { } /* Framing Error */ if (priv->last_lsr & MCT_U232_LSR_FE) { } /* Break Indicator */ if (priv->last_lsr & MCT_U232_LSR_BI) { } tty_kref_put(tty); } #endif wake_up_interruptible(&port->port.delta_msr_wait); spin_unlock_irqrestore(&priv->lock, flags); exit: retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) dev_err(&port->dev, "%s - usb_submit_urb failed with result %d\n", __func__, retval); } /* mct_u232_read_int_callback */ static void mct_u232_set_termios(struct tty_struct *tty, struct usb_serial_port *port, const struct ktermios *old_termios) { struct usb_serial *serial = port->serial; struct mct_u232_private *priv = usb_get_serial_port_data(port); struct ktermios *termios = &tty->termios; unsigned int cflag = termios->c_cflag; unsigned int old_cflag = old_termios->c_cflag; unsigned long flags; unsigned int control_state; unsigned char last_lcr; /* get a local copy of the current port settings */ spin_lock_irqsave(&priv->lock, flags); control_state = priv->control_state; spin_unlock_irqrestore(&priv->lock, flags); last_lcr = 0; /* * Update baud rate. * Do not attempt to cache old rates and skip settings, * disconnects screw such tricks up completely. * Premature optimization is the root of all evil. */ /* reassert DTR and RTS on transition from B0 */ if ((old_cflag & CBAUD) == B0) { dev_dbg(&port->dev, "%s: baud was B0\n", __func__); control_state |= TIOCM_DTR | TIOCM_RTS; mct_u232_set_modem_ctrl(port, control_state); } mct_u232_set_baud_rate(tty, serial, port, tty_get_baud_rate(tty)); if ((cflag & CBAUD) == B0) { dev_dbg(&port->dev, "%s: baud is B0\n", __func__); /* Drop RTS and DTR */ control_state &= ~(TIOCM_DTR | TIOCM_RTS); mct_u232_set_modem_ctrl(port, control_state); } /* * Update line control register (LCR) */ /* set the parity */ if (cflag & PARENB) last_lcr |= (cflag & PARODD) ? MCT_U232_PARITY_ODD : MCT_U232_PARITY_EVEN; else last_lcr |= MCT_U232_PARITY_NONE; /* set the number of data bits */ switch (cflag & CSIZE) { case CS5: last_lcr |= MCT_U232_DATA_BITS_5; break; case CS6: last_lcr |= MCT_U232_DATA_BITS_6; break; case CS7: last_lcr |= MCT_U232_DATA_BITS_7; break; case CS8: last_lcr |= MCT_U232_DATA_BITS_8; break; default: dev_err(&port->dev, "CSIZE was not CS5-CS8, using default of 8\n"); last_lcr |= MCT_U232_DATA_BITS_8; break; } termios->c_cflag &= ~CMSPAR; /* set the number of stop bits */ last_lcr |= (cflag & CSTOPB) ? MCT_U232_STOP_BITS_2 : MCT_U232_STOP_BITS_1; mct_u232_set_line_ctrl(port, last_lcr); /* save off the modified port settings */ spin_lock_irqsave(&priv->lock, flags); priv->control_state = control_state; priv->last_lcr = last_lcr; spin_unlock_irqrestore(&priv->lock, flags); } /* mct_u232_set_termios */ static int mct_u232_break_ctl(struct tty_struct *tty, int break_state) { struct usb_serial_port *port = tty->driver_data; struct mct_u232_private *priv = usb_get_serial_port_data(port); unsigned char lcr; unsigned long flags; spin_lock_irqsave(&priv->lock, flags); lcr = priv->last_lcr; if (break_state) lcr |= MCT_U232_SET_BREAK; spin_unlock_irqrestore(&priv->lock, flags); return mct_u232_set_line_ctrl(port, lcr); } /* mct_u232_break_ctl */ static int mct_u232_tiocmget(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct mct_u232_private *priv = usb_get_serial_port_data(port); unsigned int control_state; unsigned long flags; spin_lock_irqsave(&priv->lock, flags); control_state = priv->control_state; spin_unlock_irqrestore(&priv->lock, flags); return control_state; } static int mct_u232_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) { struct usb_serial_port *port = tty->driver_data; struct mct_u232_private *priv = usb_get_serial_port_data(port); unsigned int control_state; unsigned long flags; spin_lock_irqsave(&priv->lock, flags); control_state = priv->control_state; if (set & TIOCM_RTS) control_state |= TIOCM_RTS; if (set & TIOCM_DTR) control_state |= TIOCM_DTR; if (clear & TIOCM_RTS) control_state &= ~TIOCM_RTS; if (clear & TIOCM_DTR) control_state &= ~TIOCM_DTR; priv->control_state = control_state; spin_unlock_irqrestore(&priv->lock, flags); return mct_u232_set_modem_ctrl(port, control_state); } static void mct_u232_throttle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct mct_u232_private *priv = usb_get_serial_port_data(port); unsigned int control_state; spin_lock_irq(&priv->lock); priv->rx_flags |= THROTTLED; if (C_CRTSCTS(tty)) { priv->control_state &= ~TIOCM_RTS; control_state = priv->control_state; spin_unlock_irq(&priv->lock); mct_u232_set_modem_ctrl(port, control_state); } else { spin_unlock_irq(&priv->lock); } } static void mct_u232_unthrottle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct mct_u232_private *priv = usb_get_serial_port_data(port); unsigned int control_state; spin_lock_irq(&priv->lock); if ((priv->rx_flags & THROTTLED) && C_CRTSCTS(tty)) { priv->rx_flags &= ~THROTTLED; priv->control_state |= TIOCM_RTS; control_state = priv->control_state; spin_unlock_irq(&priv->lock); mct_u232_set_modem_ctrl(port, control_state); } else { spin_unlock_irq(&priv->lock); } } module_usb_serial_driver(serial_drivers, id_table); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL");
72 82 175 206 262 228 136 136 4 1 25 217 8 177 215 178 237 162 152 150 3 2 189 9 8 189 4 180 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* SCTP kernel implementation * (C) Copyright IBM Corp. 2001, 2004 * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. * Copyright (c) 2001-2003 Intel Corp. * * This file is part of the SCTP kernel implementation * * The base lksctp header. * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * La Monte H.P. Yarroll <piggy@acm.org> * Xingang Guo <xingang.guo@intel.com> * Jon Grimm <jgrimm@us.ibm.com> * Daisy Chang <daisyc@us.ibm.com> * Sridhar Samudrala <sri@us.ibm.com> * Ardelle Fan <ardelle.fan@intel.com> * Ryan Layer <rmlayer@us.ibm.com> * Kevin Gao <kevin.gao@intel.com> */ #ifndef __net_sctp_h__ #define __net_sctp_h__ /* Header Strategy. * Start getting some control over the header file dependencies: * includes * constants * structs * prototypes * macros, externs, and inlines * * Move test_frame specific items out of the kernel headers * and into the test frame headers. This is not perfect in any sense * and will continue to evolve. */ #include <linux/types.h> #include <linux/slab.h> #include <linux/in.h> #include <linux/tty.h> #include <linux/proc_fs.h> #include <linux/spinlock.h> #include <linux/jiffies.h> #include <linux/idr.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> #include <net/ip6_route.h> #endif #include <linux/uaccess.h> #include <asm/page.h> #include <net/sock.h> #include <net/snmp.h> #include <net/sctp/structs.h> #include <net/sctp/constants.h> #ifdef CONFIG_IP_SCTP_MODULE #define SCTP_PROTOSW_FLAG 0 #else /* static! */ #define SCTP_PROTOSW_FLAG INET_PROTOSW_PERMANENT #endif /* * Function declarations. */ /* * sctp/protocol.c */ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *addr, enum sctp_scope, gfp_t gfp, int flags); struct sctp_pf *sctp_get_pf_specific(sa_family_t family); int sctp_register_pf(struct sctp_pf *, sa_family_t); void sctp_addr_wq_mgmt(struct net *, struct sctp_sockaddr_entry *, int); int sctp_udp_sock_start(struct net *net); void sctp_udp_sock_stop(struct net *net); /* * sctp/socket.c */ int sctp_inet_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int addr_len, int flags); int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb); int sctp_inet_listen(struct socket *sock, int backlog); void sctp_write_space(struct sock *sk); void sctp_data_ready(struct sock *sk); __poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait); void sctp_sock_rfree(struct sk_buff *skb); extern struct percpu_counter sctp_sockets_allocated; int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int *); typedef int (*sctp_callback_t)(struct sctp_endpoint *, struct sctp_transport *, void *); void sctp_transport_walk_start(struct rhashtable_iter *iter); void sctp_transport_walk_stop(struct rhashtable_iter *iter); struct sctp_transport *sctp_transport_get_next(struct net *net, struct rhashtable_iter *iter); struct sctp_transport *sctp_transport_get_idx(struct net *net, struct rhashtable_iter *iter, int pos); int sctp_transport_lookup_process(sctp_callback_t cb, struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, void *p, int dif); int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, struct net *net, int *pos, void *p); int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p); int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, struct sctp_info *info); /* * sctp/primitive.c */ int sctp_primitive_ASSOCIATE(struct net *, struct sctp_association *, void *arg); int sctp_primitive_SHUTDOWN(struct net *, struct sctp_association *, void *arg); int sctp_primitive_ABORT(struct net *, struct sctp_association *, void *arg); int sctp_primitive_SEND(struct net *, struct sctp_association *, void *arg); int sctp_primitive_REQUESTHEARTBEAT(struct net *, struct sctp_association *, void *arg); int sctp_primitive_ASCONF(struct net *, struct sctp_association *, void *arg); int sctp_primitive_RECONF(struct net *net, struct sctp_association *asoc, void *arg); /* * sctp/input.c */ int sctp_rcv(struct sk_buff *skb); int sctp_v4_err(struct sk_buff *skb, u32 info); int sctp_hash_endpoint(struct sctp_endpoint *ep); void sctp_unhash_endpoint(struct sctp_endpoint *); struct sock *sctp_err_lookup(struct net *net, int family, struct sk_buff *, struct sctphdr *, struct sctp_association **, struct sctp_transport **); void sctp_err_finish(struct sock *, struct sctp_transport *); int sctp_udp_v4_err(struct sock *sk, struct sk_buff *skb); int sctp_udp_v6_err(struct sock *sk, struct sk_buff *skb); void sctp_icmp_frag_needed(struct sock *, struct sctp_association *, struct sctp_transport *t, __u32 pmtu); void sctp_icmp_redirect(struct sock *, struct sctp_transport *, struct sk_buff *); void sctp_icmp_proto_unreachable(struct sock *sk, struct sctp_association *asoc, struct sctp_transport *t); int sctp_transport_hashtable_init(void); void sctp_transport_hashtable_destroy(void); int sctp_hash_transport(struct sctp_transport *t); void sctp_unhash_transport(struct sctp_transport *t); struct sctp_transport *sctp_addrs_lookup_transport( struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, int dif, int sdif); struct sctp_transport *sctp_epaddr_lookup_transport( const struct sctp_endpoint *ep, const union sctp_addr *paddr); bool sctp_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif); /* * sctp/proc.c */ int __net_init sctp_proc_init(struct net *net); /* * sctp/offload.c */ int sctp_offload_init(void); /* * sctp/stream_sched.c */ void sctp_sched_ops_init(void); /* * sctp/stream.c */ int sctp_send_reset_streams(struct sctp_association *asoc, struct sctp_reset_streams *params); int sctp_send_reset_assoc(struct sctp_association *asoc); int sctp_send_add_streams(struct sctp_association *asoc, struct sctp_add_streams *params); /* * Module global variables */ /* * sctp/protocol.c */ extern struct kmem_cache *sctp_chunk_cachep __read_mostly; extern struct kmem_cache *sctp_bucket_cachep __read_mostly; extern long sysctl_sctp_mem[3]; extern int sysctl_sctp_rmem[3]; extern int sysctl_sctp_wmem[3]; /* * Section: Macros, externs, and inlines */ /* SCTP SNMP MIB stats handlers */ #define SCTP_INC_STATS(net, field) SNMP_INC_STATS((net)->sctp.sctp_statistics, field) #define __SCTP_INC_STATS(net, field) __SNMP_INC_STATS((net)->sctp.sctp_statistics, field) #define SCTP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->sctp.sctp_statistics, field) /* sctp mib definitions */ enum { SCTP_MIB_NUM = 0, SCTP_MIB_CURRESTAB, /* CurrEstab */ SCTP_MIB_ACTIVEESTABS, /* ActiveEstabs */ SCTP_MIB_PASSIVEESTABS, /* PassiveEstabs */ SCTP_MIB_ABORTEDS, /* Aborteds */ SCTP_MIB_SHUTDOWNS, /* Shutdowns */ SCTP_MIB_OUTOFBLUES, /* OutOfBlues */ SCTP_MIB_CHECKSUMERRORS, /* ChecksumErrors */ SCTP_MIB_OUTCTRLCHUNKS, /* OutCtrlChunks */ SCTP_MIB_OUTORDERCHUNKS, /* OutOrderChunks */ SCTP_MIB_OUTUNORDERCHUNKS, /* OutUnorderChunks */ SCTP_MIB_INCTRLCHUNKS, /* InCtrlChunks */ SCTP_MIB_INORDERCHUNKS, /* InOrderChunks */ SCTP_MIB_INUNORDERCHUNKS, /* InUnorderChunks */ SCTP_MIB_FRAGUSRMSGS, /* FragUsrMsgs */ SCTP_MIB_REASMUSRMSGS, /* ReasmUsrMsgs */ SCTP_MIB_OUTSCTPPACKS, /* OutSCTPPacks */ SCTP_MIB_INSCTPPACKS, /* InSCTPPacks */ SCTP_MIB_T1_INIT_EXPIREDS, SCTP_MIB_T1_COOKIE_EXPIREDS, SCTP_MIB_T2_SHUTDOWN_EXPIREDS, SCTP_MIB_T3_RTX_EXPIREDS, SCTP_MIB_T4_RTO_EXPIREDS, SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS, SCTP_MIB_DELAY_SACK_EXPIREDS, SCTP_MIB_AUTOCLOSE_EXPIREDS, SCTP_MIB_T1_RETRANSMITS, SCTP_MIB_T3_RETRANSMITS, SCTP_MIB_PMTUD_RETRANSMITS, SCTP_MIB_FAST_RETRANSMITS, SCTP_MIB_IN_PKT_SOFTIRQ, SCTP_MIB_IN_PKT_BACKLOG, SCTP_MIB_IN_PKT_DISCARDS, SCTP_MIB_IN_DATA_CHUNK_DISCARDS, __SCTP_MIB_MAX }; #define SCTP_MIB_MAX __SCTP_MIB_MAX struct sctp_mib { unsigned long mibs[SCTP_MIB_MAX]; }; /* helper function to track stats about max rto and related transport */ static inline void sctp_max_rto(struct sctp_association *asoc, struct sctp_transport *trans) { if (asoc->stats.max_obs_rto < (__u64)trans->rto) { asoc->stats.max_obs_rto = trans->rto; memset(&asoc->stats.obs_rto_ipaddr, 0, sizeof(struct sockaddr_storage)); memcpy(&asoc->stats.obs_rto_ipaddr, &trans->ipaddr, trans->af_specific->sockaddr_len); } } /* * Macros for keeping a global reference of object allocations. */ #ifdef CONFIG_SCTP_DBG_OBJCNT extern atomic_t sctp_dbg_objcnt_sock; extern atomic_t sctp_dbg_objcnt_ep; extern atomic_t sctp_dbg_objcnt_assoc; extern atomic_t sctp_dbg_objcnt_transport; extern atomic_t sctp_dbg_objcnt_chunk; extern atomic_t sctp_dbg_objcnt_bind_addr; extern atomic_t sctp_dbg_objcnt_bind_bucket; extern atomic_t sctp_dbg_objcnt_addr; extern atomic_t sctp_dbg_objcnt_datamsg; extern atomic_t sctp_dbg_objcnt_keys; /* Macros to atomically increment/decrement objcnt counters. */ #define SCTP_DBG_OBJCNT_INC(name) \ atomic_inc(&sctp_dbg_objcnt_## name) #define SCTP_DBG_OBJCNT_DEC(name) \ atomic_dec(&sctp_dbg_objcnt_## name) #define SCTP_DBG_OBJCNT(name) \ atomic_t sctp_dbg_objcnt_## name = ATOMIC_INIT(0) /* Macro to help create new entries in the global array of * objcnt counters. */ #define SCTP_DBG_OBJCNT_ENTRY(name) \ {.label= #name, .counter= &sctp_dbg_objcnt_## name} void sctp_dbg_objcnt_init(struct net *); #else #define SCTP_DBG_OBJCNT_INC(name) #define SCTP_DBG_OBJCNT_DEC(name) static inline void sctp_dbg_objcnt_init(struct net *net) { return; } #endif /* CONFIG_SCTP_DBG_OBJCOUNT */ #if defined CONFIG_SYSCTL void sctp_sysctl_register(void); void sctp_sysctl_unregister(void); int sctp_sysctl_net_register(struct net *net); void sctp_sysctl_net_unregister(struct net *net); #else static inline void sctp_sysctl_register(void) { return; } static inline void sctp_sysctl_unregister(void) { return; } static inline int sctp_sysctl_net_register(struct net *net) { return 0; } static inline void sctp_sysctl_net_unregister(struct net *net) { return; } #endif /* Size of Supported Address Parameter for 'x' address types. */ #define SCTP_SAT_LEN(x) (sizeof(struct sctp_paramhdr) + (x) * sizeof(__u16)) #if IS_ENABLED(CONFIG_IPV6) void sctp_v6_pf_init(void); void sctp_v6_pf_exit(void); int sctp_v6_protosw_init(void); void sctp_v6_protosw_exit(void); int sctp_v6_add_protocol(void); void sctp_v6_del_protocol(void); #else /* #ifdef defined(CONFIG_IPV6) */ static inline void sctp_v6_pf_init(void) { return; } static inline void sctp_v6_pf_exit(void) { return; } static inline int sctp_v6_protosw_init(void) { return 0; } static inline void sctp_v6_protosw_exit(void) { return; } static inline int sctp_v6_add_protocol(void) { return 0; } static inline void sctp_v6_del_protocol(void) { return; } #endif /* #if defined(CONFIG_IPV6) */ /* Map an association to an assoc_id. */ static inline sctp_assoc_t sctp_assoc2id(const struct sctp_association *asoc) { return asoc ? asoc->assoc_id : 0; } static inline enum sctp_sstat_state sctp_assoc_to_state(const struct sctp_association *asoc) { /* SCTP's uapi always had SCTP_EMPTY(=0) as a dummy state, but we * got rid of it in kernel space. Therefore SCTP_CLOSED et al * start at =1 in user space, but actually as =0 in kernel space. * Now that we can not break user space and SCTP_EMPTY is exposed * there, we need to fix it up with an ugly offset not to break * applications. :( */ return asoc->state + 1; } /* Look up the association by its id. */ struct sctp_association *sctp_id2assoc(struct sock *sk, sctp_assoc_t id); /* A macro to walk a list of skbs. */ #define sctp_skb_for_each(pos, head, tmp) \ skb_queue_walk_safe(head, pos, tmp) /** * sctp_list_dequeue - remove from the head of the queue * @list: list to dequeue from * * Remove the head of the list. The head item is * returned or %NULL if the list is empty. */ static inline struct list_head *sctp_list_dequeue(struct list_head *list) { struct list_head *result = NULL; if (!list_empty(list)) { result = list->next; list_del_init(result); } return result; } /* SCTP version of skb_set_owner_r. We need this one because * of the way we have to do receive buffer accounting on bundled * chunks. */ static inline void sctp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) { struct sctp_ulpevent *event = sctp_skb2event(skb); skb_orphan(skb); skb->sk = sk; skb->destructor = sctp_sock_rfree; atomic_add(event->rmem_len, &sk->sk_rmem_alloc); /* * This mimics the behavior of skb_set_owner_r */ sk_mem_charge(sk, event->rmem_len); } /* Tests if the list has one and only one entry. */ static inline int sctp_list_single_entry(struct list_head *head) { return list_is_singular(head); } static inline bool sctp_chunk_pending(const struct sctp_chunk *chunk) { return !list_empty(&chunk->list); } /* Walk through a list of TLV parameters. Don't trust the * individual parameter lengths and instead depend on * the chunk length to indicate when to stop. Make sure * there is room for a param header too. */ #define sctp_walk_params(pos, chunk)\ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length)) #define _sctp_walk_params(pos, chunk, end)\ for (pos.v = (u8 *)(chunk + 1);\ (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <=\ (void *)chunk + end) &&\ pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\ ntohs(pos.p->length) >= sizeof(struct sctp_paramhdr);\ pos.v += SCTP_PAD4(ntohs(pos.p->length))) #define sctp_walk_errors(err, chunk_hdr)\ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length)) #define _sctp_walk_errors(err, chunk_hdr, end)\ for (err = (struct sctp_errhdr *)((void *)chunk_hdr + \ sizeof(struct sctp_chunkhdr));\ ((void *)err + offsetof(struct sctp_errhdr, length) + sizeof(err->length) <=\ (void *)chunk_hdr + end) &&\ (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\ ntohs(err->length) >= sizeof(struct sctp_errhdr); \ err = (struct sctp_errhdr *)((void *)err + SCTP_PAD4(ntohs(err->length)))) #define sctp_walk_fwdtsn(pos, chunk)\ _sctp_walk_fwdtsn((pos), (chunk), ntohs((chunk)->chunk_hdr->length) - sizeof(struct sctp_fwdtsn_chunk)) #define _sctp_walk_fwdtsn(pos, chunk, end)\ for (pos = (void *)(chunk->subh.fwdtsn_hdr + 1);\ (void *)pos <= (void *)(chunk->subh.fwdtsn_hdr + 1) + end - sizeof(struct sctp_fwdtsn_skip);\ pos++) /* External references. */ extern struct proto sctp_prot; extern struct proto sctpv6_prot; void sctp_put_port(struct sock *sk); extern struct idr sctp_assocs_id; extern spinlock_t sctp_assocs_id_lock; /* Static inline functions. */ /* Convert from an IP version number to an Address Family symbol. */ static inline int ipver2af(__u8 ipver) { switch (ipver) { case 4: return AF_INET; case 6: return AF_INET6; default: return 0; } } /* Convert from an address parameter type to an address family. */ static inline int param_type2af(__be16 type) { switch (type) { case SCTP_PARAM_IPV4_ADDRESS: return AF_INET; case SCTP_PARAM_IPV6_ADDRESS: return AF_INET6; default: return 0; } } /* Warning: The following hash functions assume a power of two 'size'. */ /* This is the hash function for the SCTP port hash table. */ static inline int sctp_phashfn(struct net *net, __u16 lport) { return (net_hash_mix(net) + lport) & (sctp_port_hashsize - 1); } /* This is the hash function for the endpoint hash table. */ static inline int sctp_ep_hashfn(struct net *net, __u16 lport) { return (net_hash_mix(net) + lport) & (sctp_ep_hashsize - 1); } #define sctp_for_each_hentry(ep, head) \ hlist_for_each_entry(ep, head, node) /* Is a socket of this style? */ #define sctp_style(sk, style) __sctp_style((sk), (SCTP_SOCKET_##style)) static inline int __sctp_style(const struct sock *sk, enum sctp_socket_type style) { return sctp_sk(sk)->type == style; } /* Is the association in this state? */ #define sctp_state(asoc, state) __sctp_state((asoc), (SCTP_STATE_##state)) static inline int __sctp_state(const struct sctp_association *asoc, enum sctp_state state) { return asoc->state == state; } /* Is the socket in this state? */ #define sctp_sstate(sk, state) __sctp_sstate((sk), (SCTP_SS_##state)) static inline int __sctp_sstate(const struct sock *sk, enum sctp_sock_state state) { return sk->sk_state == state; } /* Map v4-mapped v6 address back to v4 address */ static inline void sctp_v6_map_v4(union sctp_addr *addr) { addr->v4.sin_family = AF_INET; addr->v4.sin_port = addr->v6.sin6_port; addr->v4.sin_addr.s_addr = addr->v6.sin6_addr.s6_addr32[3]; } /* Map v4 address to v4-mapped v6 address */ static inline void sctp_v4_map_v6(union sctp_addr *addr) { __be16 port; port = addr->v4.sin_port; addr->v6.sin6_addr.s6_addr32[3] = addr->v4.sin_addr.s_addr; addr->v6.sin6_port = port; addr->v6.sin6_family = AF_INET6; addr->v6.sin6_flowinfo = 0; addr->v6.sin6_scope_id = 0; addr->v6.sin6_addr.s6_addr32[0] = 0; addr->v6.sin6_addr.s6_addr32[1] = 0; addr->v6.sin6_addr.s6_addr32[2] = htonl(0x0000ffff); } /* The cookie is always 0 since this is how it's used in the * pmtu code. */ static inline struct dst_entry *sctp_transport_dst_check(struct sctp_transport *t) { if (t->dst && !dst_check(t->dst, t->dst_cookie)) sctp_transport_dst_release(t); return t->dst; } /* Calculate max payload size given a MTU, or the total overhead if * given MTU is zero */ static inline __u32 __sctp_mtu_payload(const struct sctp_sock *sp, const struct sctp_transport *t, __u32 mtu, __u32 extra) { __u32 overhead = sizeof(struct sctphdr) + extra; if (sp) { overhead += sp->pf->af->net_header_len; if (sp->udp_port && (!t || t->encap_port)) overhead += sizeof(struct udphdr); } else { overhead += sizeof(struct ipv6hdr); } if (WARN_ON_ONCE(mtu && mtu <= overhead)) mtu = overhead; return mtu ? mtu - overhead : overhead; } static inline __u32 sctp_mtu_payload(const struct sctp_sock *sp, __u32 mtu, __u32 extra) { return __sctp_mtu_payload(sp, NULL, mtu, extra); } static inline __u32 sctp_dst_mtu(const struct dst_entry *dst) { return SCTP_TRUNC4(max_t(__u32, dst_mtu(dst), SCTP_DEFAULT_MINSEGMENT)); } static inline bool sctp_transport_pmtu_check(struct sctp_transport *t) { __u32 pmtu = sctp_dst_mtu(t->dst); if (t->pathmtu == pmtu) return true; t->pathmtu = pmtu; return false; } static inline __u32 sctp_min_frag_point(struct sctp_sock *sp, __u16 datasize) { return sctp_mtu_payload(sp, SCTP_DEFAULT_MINSEGMENT, datasize); } static inline int sctp_transport_pl_hlen(struct sctp_transport *t) { return __sctp_mtu_payload(sctp_sk(t->asoc->base.sk), t, 0, 0) - sizeof(struct sctphdr); } static inline void sctp_transport_pl_reset(struct sctp_transport *t) { if (t->probe_interval && (t->param_flags & SPP_PMTUD_ENABLE) && (t->state == SCTP_ACTIVE || t->state == SCTP_UNKNOWN)) { if (t->pl.state == SCTP_PL_DISABLED) { t->pl.state = SCTP_PL_BASE; t->pl.pmtu = SCTP_BASE_PLPMTU; t->pl.probe_size = SCTP_BASE_PLPMTU; sctp_transport_reset_probe_timer(t); } } else { if (t->pl.state != SCTP_PL_DISABLED) { if (timer_delete(&t->probe_timer)) sctp_transport_put(t); t->pl.state = SCTP_PL_DISABLED; } } } static inline void sctp_transport_pl_update(struct sctp_transport *t) { if (t->pl.state == SCTP_PL_DISABLED) return; t->pl.state = SCTP_PL_BASE; t->pl.pmtu = SCTP_BASE_PLPMTU; t->pl.probe_size = SCTP_BASE_PLPMTU; sctp_transport_reset_probe_timer(t); } static inline bool sctp_transport_pl_enabled(struct sctp_transport *t) { return t->pl.state != SCTP_PL_DISABLED; } static inline bool sctp_newsk_ready(const struct sock *sk) { return sock_flag(sk, SOCK_DEAD) || sk->sk_socket; } static inline void sctp_sock_set_nodelay(struct sock *sk) { lock_sock(sk); sctp_sk(sk)->nodelay = true; release_sock(sk); } #endif /* __net_sctp_h__ */
2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 // SPDX-License-Identifier: GPL-2.0-or-later /* * Sunplus spca561 subdriver * * Copyright (C) 2004 Michel Xhaard mxhaard@magic.fr * * V4L2 by Jean-Francois Moine <http://moinejf.free.fr> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "spca561" #include <linux/input.h> #include "gspca.h" MODULE_AUTHOR("Michel Xhaard <mxhaard@users.sourceforge.net>"); MODULE_DESCRIPTION("GSPCA/SPCA561 USB Camera Driver"); MODULE_LICENSE("GPL"); #define EXPOSURE_MAX (2047 + 325) /* specific webcam descriptor */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ struct { /* hue/contrast control cluster */ struct v4l2_ctrl *contrast; struct v4l2_ctrl *hue; }; struct v4l2_ctrl *autogain; #define EXPO12A_DEF 3 __u8 expo12a; /* expo/gain? for rev 12a */ __u8 chip_revision; #define Rev012A 0 #define Rev072A 1 signed char ag_cnt; #define AG_CNT_START 13 }; static const struct v4l2_pix_format sif_012a_mode[] = { {160, 120, V4L2_PIX_FMT_SGBRG8, V4L2_FIELD_NONE, .bytesperline = 160, .sizeimage = 160 * 120, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 3}, {176, 144, V4L2_PIX_FMT_SGBRG8, V4L2_FIELD_NONE, .bytesperline = 176, .sizeimage = 176 * 144, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 2}, {320, 240, V4L2_PIX_FMT_SPCA561, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 4 / 8, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 1}, {352, 288, V4L2_PIX_FMT_SPCA561, V4L2_FIELD_NONE, .bytesperline = 352, .sizeimage = 352 * 288 * 4 / 8, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, }; static const struct v4l2_pix_format sif_072a_mode[] = { {160, 120, V4L2_PIX_FMT_SGBRG8, V4L2_FIELD_NONE, .bytesperline = 160, .sizeimage = 160 * 120, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 3}, {176, 144, V4L2_PIX_FMT_SGBRG8, V4L2_FIELD_NONE, .bytesperline = 176, .sizeimage = 176 * 144, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 2}, {320, 240, V4L2_PIX_FMT_SGBRG8, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 1}, {352, 288, V4L2_PIX_FMT_SGBRG8, V4L2_FIELD_NONE, .bytesperline = 352, .sizeimage = 352 * 288, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, }; /* * Initialization data * I'm not very sure how to split initialization from open data * chunks. For now, we'll consider everything as initialization */ /* Frame packet header offsets for the spca561 */ #define SPCA561_OFFSET_SNAP 1 #define SPCA561_OFFSET_TYPE 2 #define SPCA561_OFFSET_COMPRESS 3 #define SPCA561_OFFSET_FRAMSEQ 4 #define SPCA561_OFFSET_GPIO 5 #define SPCA561_OFFSET_USBBUFF 6 #define SPCA561_OFFSET_WIN2GRAVE 7 #define SPCA561_OFFSET_WIN2RAVE 8 #define SPCA561_OFFSET_WIN2BAVE 9 #define SPCA561_OFFSET_WIN2GBAVE 10 #define SPCA561_OFFSET_WIN1GRAVE 11 #define SPCA561_OFFSET_WIN1RAVE 12 #define SPCA561_OFFSET_WIN1BAVE 13 #define SPCA561_OFFSET_WIN1GBAVE 14 #define SPCA561_OFFSET_FREQ 15 #define SPCA561_OFFSET_VSYNC 16 #define SPCA561_INDEX_I2C_BASE 0x8800 #define SPCA561_SNAPBIT 0x20 #define SPCA561_SNAPCTRL 0x40 static const u16 rev72a_reset[][2] = { {0x0000, 0x8114}, /* Software GPIO output data */ {0x0001, 0x8114}, /* Software GPIO output data */ {0x0000, 0x8112}, /* Some kind of reset */ {} }; static const __u16 rev72a_init_data1[][2] = { {0x0003, 0x8701}, /* PCLK clock delay adjustment */ {0x0001, 0x8703}, /* HSYNC from cmos inverted */ {0x0011, 0x8118}, /* Enable and conf sensor */ {0x0001, 0x8118}, /* Conf sensor */ {0x0092, 0x8804}, /* I know nothing about these */ {0x0010, 0x8802}, /* 0x88xx registers, so I won't */ {} }; static const u16 rev72a_init_sensor1[][2] = { {0x0001, 0x000d}, {0x0002, 0x0018}, {0x0004, 0x0165}, {0x0005, 0x0021}, {0x0007, 0x00aa}, {0x0020, 0x1504}, {0x0039, 0x0002}, {0x0035, 0x0010}, {0x0009, 0x1049}, {0x0028, 0x000b}, {0x003b, 0x000f}, {0x003c, 0x0000}, {} }; static const __u16 rev72a_init_data2[][2] = { {0x0018, 0x8601}, /* Pixel/line selection for color separation */ {0x0000, 0x8602}, /* Optical black level for user setting */ {0x0060, 0x8604}, /* Optical black horizontal offset */ {0x0002, 0x8605}, /* Optical black vertical offset */ {0x0000, 0x8603}, /* Non-automatic optical black level */ {0x0002, 0x865b}, /* Horizontal offset for valid pixels */ {0x0000, 0x865f}, /* Vertical valid pixels window (x2) */ {0x00b0, 0x865d}, /* Horizontal valid pixels window (x2) */ {0x0090, 0x865e}, /* Vertical valid lines window (x2) */ {0x00e0, 0x8406}, /* Memory buffer threshold */ {0x0000, 0x8660}, /* Compensation memory stuff */ {0x0002, 0x8201}, /* Output address for r/w serial EEPROM */ {0x0008, 0x8200}, /* Clear valid bit for serial EEPROM */ {0x0001, 0x8200}, /* OprMode to be executed by hardware */ /* from ms-win */ {0x0000, 0x8611}, /* R offset for white balance */ {0x00fd, 0x8612}, /* Gr offset for white balance */ {0x0003, 0x8613}, /* B offset for white balance */ {0x0000, 0x8614}, /* Gb offset for white balance */ /* from ms-win */ {0x0035, 0x8651}, /* R gain for white balance */ {0x0040, 0x8652}, /* Gr gain for white balance */ {0x005f, 0x8653}, /* B gain for white balance */ {0x0040, 0x8654}, /* Gb gain for white balance */ {0x0002, 0x8502}, /* Maximum average bit rate stuff */ {0x0011, 0x8802}, {0x0087, 0x8700}, /* Set master clock (96Mhz????) */ {0x0081, 0x8702}, /* Master clock output enable */ {0x0000, 0x8500}, /* Set image type (352x288 no compression) */ /* Originally was 0x0010 (352x288 compression) */ {0x0002, 0x865b}, /* Horizontal offset for valid pixels */ {0x0003, 0x865c}, /* Vertical offset for valid lines */ {} }; static const u16 rev72a_init_sensor2[][2] = { {0x0003, 0x0121}, {0x0004, 0x0165}, {0x0005, 0x002f}, /* blanking control column */ {0x0006, 0x0000}, /* blanking mode row*/ {0x000a, 0x0002}, {0x0009, 0x1061}, /* setexposure times && pixel clock * 0001 0 | 000 0110 0001 */ {0x0035, 0x0014}, {} }; /******************** QC Express etch2 stuff ********************/ static const __u16 Pb100_1map8300[][2] = { /* reg, value */ {0x8320, 0x3304}, {0x8303, 0x0125}, /* image area */ {0x8304, 0x0169}, {0x8328, 0x000b}, {0x833c, 0x0001}, /*fixme: win:07*/ {0x832f, 0x1904}, /*fixme: was 0419*/ {0x8307, 0x00aa}, {0x8301, 0x0003}, {0x8302, 0x000e}, {} }; static const __u16 Pb100_2map8300[][2] = { /* reg, value */ {0x8339, 0x0000}, {0x8307, 0x00aa}, {} }; static const __u16 spca561_161rev12A_data1[][2] = { {0x29, 0x8118}, /* Control register (various enable bits) */ {0x08, 0x8114}, /* GPIO: Led off */ {0x0e, 0x8112}, /* 0x0e stream off 0x3e stream on */ {0x00, 0x8102}, /* white balance - new */ {0x92, 0x8804}, {0x04, 0x8802}, /* windows uses 08 */ {} }; static const __u16 spca561_161rev12A_data2[][2] = { {0x21, 0x8118}, {0x10, 0x8500}, {0x07, 0x8601}, {0x07, 0x8602}, {0x04, 0x8501}, {0x07, 0x8201}, /* windows uses 02 */ {0x08, 0x8200}, {0x01, 0x8200}, {0x90, 0x8604}, {0x00, 0x8605}, {0xb0, 0x8603}, /* sensor gains */ {0x07, 0x8601}, /* white balance - new */ {0x07, 0x8602}, /* white balance - new */ {0x00, 0x8610}, /* *red */ {0x00, 0x8611}, /* 3f *green */ {0x00, 0x8612}, /* green *blue */ {0x00, 0x8613}, /* blue *green */ {0x43, 0x8614}, /* green *red - white balance - was 0x35 */ {0x40, 0x8615}, /* 40 *green - white balance - was 0x35 */ {0x71, 0x8616}, /* 7a *blue - white balance - was 0x35 */ {0x40, 0x8617}, /* 40 *green - white balance - was 0x35 */ {0x0c, 0x8620}, /* 0c */ {0xc8, 0x8631}, /* c8 */ {0xc8, 0x8634}, /* c8 */ {0x23, 0x8635}, /* 23 */ {0x1f, 0x8636}, /* 1f */ {0xdd, 0x8637}, /* dd */ {0xe1, 0x8638}, /* e1 */ {0x1d, 0x8639}, /* 1d */ {0x21, 0x863a}, /* 21 */ {0xe3, 0x863b}, /* e3 */ {0xdf, 0x863c}, /* df */ {0xf0, 0x8505}, {0x32, 0x850a}, /* {0x99, 0x8700}, * - white balance - new (removed) */ /* HDG we used to do this in stop0, making the init state and the state after a start / stop different, so do this here instead. */ {0x29, 0x8118}, {} }; static void reg_w_val(struct gspca_dev *gspca_dev, __u16 index, __u8 value) { int ret; struct usb_device *dev = gspca_dev->dev; ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), 0, /* request */ USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, NULL, 0, 500); gspca_dbg(gspca_dev, D_USBO, "reg write: 0x%02x:0x%02x\n", index, value); if (ret < 0) pr_err("reg write: error %d\n", ret); } static void write_vector(struct gspca_dev *gspca_dev, const __u16 data[][2]) { int i; i = 0; while (data[i][1] != 0) { reg_w_val(gspca_dev, data[i][1], data[i][0]); i++; } } /* read 'len' bytes to gspca_dev->usb_buf */ static void reg_r(struct gspca_dev *gspca_dev, __u16 index, __u16 length) { usb_control_msg(gspca_dev->dev, usb_rcvctrlpipe(gspca_dev->dev, 0), 0, /* request */ USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, /* value */ index, gspca_dev->usb_buf, length, 500); } /* write 'len' bytes from gspca_dev->usb_buf */ static void reg_w_buf(struct gspca_dev *gspca_dev, __u16 index, __u16 len) { usb_control_msg(gspca_dev->dev, usb_sndctrlpipe(gspca_dev->dev, 0), 0, /* request */ USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, /* value */ index, gspca_dev->usb_buf, len, 500); } static void i2c_write(struct gspca_dev *gspca_dev, __u16 value, __u16 reg) { int retry = 60; reg_w_val(gspca_dev, 0x8801, reg); reg_w_val(gspca_dev, 0x8805, value); reg_w_val(gspca_dev, 0x8800, value >> 8); do { reg_r(gspca_dev, 0x8803, 1); if (!gspca_dev->usb_buf[0]) return; msleep(10); } while (--retry); } static int i2c_read(struct gspca_dev *gspca_dev, __u16 reg, __u8 mode) { int retry = 60; __u8 value; reg_w_val(gspca_dev, 0x8804, 0x92); reg_w_val(gspca_dev, 0x8801, reg); reg_w_val(gspca_dev, 0x8802, mode | 0x01); do { reg_r(gspca_dev, 0x8803, 1); if (!gspca_dev->usb_buf[0]) { reg_r(gspca_dev, 0x8800, 1); value = gspca_dev->usb_buf[0]; reg_r(gspca_dev, 0x8805, 1); return ((int) value << 8) | gspca_dev->usb_buf[0]; } msleep(10); } while (--retry); return -1; } static void sensor_mapwrite(struct gspca_dev *gspca_dev, const __u16 (*sensormap)[2]) { while ((*sensormap)[0]) { gspca_dev->usb_buf[0] = (*sensormap)[1]; gspca_dev->usb_buf[1] = (*sensormap)[1] >> 8; reg_w_buf(gspca_dev, (*sensormap)[0], 2); sensormap++; } } static void write_sensor_72a(struct gspca_dev *gspca_dev, const __u16 (*sensor)[2]) { while ((*sensor)[0]) { i2c_write(gspca_dev, (*sensor)[1], (*sensor)[0]); sensor++; } } static void init_161rev12A(struct gspca_dev *gspca_dev) { write_vector(gspca_dev, spca561_161rev12A_data1); sensor_mapwrite(gspca_dev, Pb100_1map8300); /*fixme: should be in sd_start*/ write_vector(gspca_dev, spca561_161rev12A_data2); sensor_mapwrite(gspca_dev, Pb100_2map8300); } /* this function is called at probe time */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct sd *sd = (struct sd *) gspca_dev; struct cam *cam; __u16 vendor, product; __u8 data1, data2; /* Read frm global register the USB product and vendor IDs, just to * prove that we can communicate with the device. This works, which * confirms at we are communicating properly and that the device * is a 561. */ reg_r(gspca_dev, 0x8104, 1); data1 = gspca_dev->usb_buf[0]; reg_r(gspca_dev, 0x8105, 1); data2 = gspca_dev->usb_buf[0]; vendor = (data2 << 8) | data1; reg_r(gspca_dev, 0x8106, 1); data1 = gspca_dev->usb_buf[0]; reg_r(gspca_dev, 0x8107, 1); data2 = gspca_dev->usb_buf[0]; product = (data2 << 8) | data1; if (vendor != id->idVendor || product != id->idProduct) { gspca_dbg(gspca_dev, D_PROBE, "Bad vendor / product from device\n"); return -EINVAL; } cam = &gspca_dev->cam; cam->needs_full_bandwidth = 1; sd->chip_revision = id->driver_info; if (sd->chip_revision == Rev012A) { cam->cam_mode = sif_012a_mode; cam->nmodes = ARRAY_SIZE(sif_012a_mode); } else { cam->cam_mode = sif_072a_mode; cam->nmodes = ARRAY_SIZE(sif_072a_mode); } sd->expo12a = EXPO12A_DEF; return 0; } /* this function is called at probe and resume time */ static int sd_init_12a(struct gspca_dev *gspca_dev) { gspca_dbg(gspca_dev, D_STREAM, "Chip revision: 012a\n"); init_161rev12A(gspca_dev); return 0; } static int sd_init_72a(struct gspca_dev *gspca_dev) { gspca_dbg(gspca_dev, D_STREAM, "Chip revision: 072a\n"); write_vector(gspca_dev, rev72a_reset); msleep(200); write_vector(gspca_dev, rev72a_init_data1); write_sensor_72a(gspca_dev, rev72a_init_sensor1); write_vector(gspca_dev, rev72a_init_data2); write_sensor_72a(gspca_dev, rev72a_init_sensor2); reg_w_val(gspca_dev, 0x8112, 0x30); return 0; } static void setbrightness(struct gspca_dev *gspca_dev, s32 val) { struct sd *sd = (struct sd *) gspca_dev; __u16 reg; if (sd->chip_revision == Rev012A) reg = 0x8610; else reg = 0x8611; reg_w_val(gspca_dev, reg + 0, val); /* R */ reg_w_val(gspca_dev, reg + 1, val); /* Gr */ reg_w_val(gspca_dev, reg + 2, val); /* B */ reg_w_val(gspca_dev, reg + 3, val); /* Gb */ } static void setwhite(struct gspca_dev *gspca_dev, s32 white, s32 contrast) { struct sd *sd = (struct sd *) gspca_dev; __u8 blue, red; __u16 reg; /* try to emulate MS-win as possible */ red = 0x20 + white * 3 / 8; blue = 0x90 - white * 5 / 8; if (sd->chip_revision == Rev012A) { reg = 0x8614; } else { reg = 0x8651; red += contrast - 0x20; blue += contrast - 0x20; reg_w_val(gspca_dev, 0x8652, contrast + 0x20); /* Gr */ reg_w_val(gspca_dev, 0x8654, contrast + 0x20); /* Gb */ } reg_w_val(gspca_dev, reg, red); reg_w_val(gspca_dev, reg + 2, blue); } /* rev 12a only */ static void setexposure(struct gspca_dev *gspca_dev, s32 val) { int i, expo = 0; /* Register 0x8309 controls exposure for the spca561, the basic exposure setting goes from 1-2047, where 1 is completely dark and 2047 is very bright. It not only influences exposure but also the framerate (to allow for longer exposure) from 1 - 300 it only raises the exposure time then from 300 - 600 it halves the framerate to be able to further raise the exposure time and for every 300 more it halves the framerate again. This allows for a maximum exposure time of circa 0.2 - 0.25 seconds (30 / (2000/3000) fps). Sometimes this is not enough, the 1-2047 uses bits 0-10, bits 11-12 configure a divider for the base framerate which us used at the exposure setting of 1-300. These bits configure the base framerate according to the following formula: fps = 60 / (value + 2) */ /* We choose to use the high bits setting the fixed framerate divisor asap, as setting high basic exposure setting without the fixed divider in combination with high gains makes the cam stop */ static const int table[] = { 0, 450, 550, 625, EXPOSURE_MAX }; for (i = 0; i < ARRAY_SIZE(table) - 1; i++) { if (val <= table[i + 1]) { expo = val - table[i]; if (i) expo += 300; expo |= i << 11; break; } } gspca_dev->usb_buf[0] = expo; gspca_dev->usb_buf[1] = expo >> 8; reg_w_buf(gspca_dev, 0x8309, 2); } /* rev 12a only */ static void setgain(struct gspca_dev *gspca_dev, s32 val) { /* gain reg low 6 bits 0-63 gain, bit 6 and 7, both double the sensitivity when set, so 31 + one of them set == 63, and 15 with both of them set == 63 */ if (val < 64) gspca_dev->usb_buf[0] = val; else if (val < 128) gspca_dev->usb_buf[0] = (val / 2) | 0x40; else gspca_dev->usb_buf[0] = (val / 4) | 0xc0; gspca_dev->usb_buf[1] = 0; reg_w_buf(gspca_dev, 0x8335, 2); } static void setautogain(struct gspca_dev *gspca_dev, s32 val) { struct sd *sd = (struct sd *) gspca_dev; if (val) sd->ag_cnt = AG_CNT_START; else sd->ag_cnt = -1; } static int sd_start_12a(struct gspca_dev *gspca_dev) { int mode; static const __u8 Reg8391[8] = {0x92, 0x30, 0x20, 0x00, 0x0c, 0x00, 0x00, 0x00}; mode = gspca_dev->cam.cam_mode[(int) gspca_dev->curr_mode].priv; if (mode <= 1) { /* Use compression on 320x240 and above */ reg_w_val(gspca_dev, 0x8500, 0x10 | mode); } else { /* I couldn't get the compression to work below 320x240 * Fortunately at these resolutions the bandwidth * is sufficient to push raw frames at ~20fps */ reg_w_val(gspca_dev, 0x8500, mode); } /* -- qq@kuku.eu.org */ gspca_dev->usb_buf[0] = 0xaa; gspca_dev->usb_buf[1] = 0x00; reg_w_buf(gspca_dev, 0x8307, 2); /* clock - lower 0x8X values lead to fps > 30 */ reg_w_val(gspca_dev, 0x8700, 0x8a); /* 0x8f 0x85 0x27 clock */ reg_w_val(gspca_dev, 0x8112, 0x1e | 0x20); reg_w_val(gspca_dev, 0x850b, 0x03); memcpy(gspca_dev->usb_buf, Reg8391, 8); reg_w_buf(gspca_dev, 0x8391, 8); reg_w_buf(gspca_dev, 0x8390, 8); /* Led ON (bit 3 -> 0 */ reg_w_val(gspca_dev, 0x8114, 0x00); return 0; } static int sd_start_72a(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int Clck; int mode; write_vector(gspca_dev, rev72a_reset); msleep(200); write_vector(gspca_dev, rev72a_init_data1); write_sensor_72a(gspca_dev, rev72a_init_sensor1); mode = gspca_dev->cam.cam_mode[(int) gspca_dev->curr_mode].priv; switch (mode) { default: case 0: Clck = 0x27; /* ms-win 0x87 */ break; case 1: Clck = 0x25; break; case 2: Clck = 0x22; break; case 3: Clck = 0x21; break; } reg_w_val(gspca_dev, 0x8700, Clck); /* 0x27 clock */ reg_w_val(gspca_dev, 0x8702, 0x81); reg_w_val(gspca_dev, 0x8500, mode); /* mode */ write_sensor_72a(gspca_dev, rev72a_init_sensor2); setwhite(gspca_dev, v4l2_ctrl_g_ctrl(sd->hue), v4l2_ctrl_g_ctrl(sd->contrast)); /* setbrightness(gspca_dev); * fixme: bad values */ setautogain(gspca_dev, v4l2_ctrl_g_ctrl(sd->autogain)); reg_w_val(gspca_dev, 0x8112, 0x10 | 0x20); return 0; } static void sd_stopN(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; if (sd->chip_revision == Rev012A) { reg_w_val(gspca_dev, 0x8112, 0x0e); /* Led Off (bit 3 -> 1 */ reg_w_val(gspca_dev, 0x8114, 0x08); } else { reg_w_val(gspca_dev, 0x8112, 0x20); /* reg_w_val(gspca_dev, 0x8102, 0x00); ?? */ } } static void do_autogain(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int expotimes; int pixelclk; int gainG; __u8 R, Gr, Gb, B; int y; __u8 luma_mean = 110; __u8 luma_delta = 20; __u8 spring = 4; if (sd->ag_cnt < 0) return; if (--sd->ag_cnt >= 0) return; sd->ag_cnt = AG_CNT_START; switch (sd->chip_revision) { case Rev072A: reg_r(gspca_dev, 0x8621, 1); Gr = gspca_dev->usb_buf[0]; reg_r(gspca_dev, 0x8622, 1); R = gspca_dev->usb_buf[0]; reg_r(gspca_dev, 0x8623, 1); B = gspca_dev->usb_buf[0]; reg_r(gspca_dev, 0x8624, 1); Gb = gspca_dev->usb_buf[0]; y = (77 * R + 75 * (Gr + Gb) + 29 * B) >> 8; /* u= (128*B-(43*(Gr+Gb+R))) >> 8; */ /* v= (128*R-(53*(Gr+Gb))-21*B) >> 8; */ if (y < luma_mean - luma_delta || y > luma_mean + luma_delta) { expotimes = i2c_read(gspca_dev, 0x09, 0x10); pixelclk = 0x0800; expotimes = expotimes & 0x07ff; gainG = i2c_read(gspca_dev, 0x35, 0x10); expotimes += (luma_mean - y) >> spring; gainG += (luma_mean - y) / 50; if (gainG > 0x3f) gainG = 0x3f; else if (gainG < 3) gainG = 3; i2c_write(gspca_dev, gainG, 0x35); if (expotimes > 0x0256) expotimes = 0x0256; else if (expotimes < 3) expotimes = 3; i2c_write(gspca_dev, expotimes | pixelclk, 0x09); } break; } } static void sd_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* isoc packet */ int len) /* iso packet length */ { struct sd *sd = (struct sd *) gspca_dev; len--; switch (*data++) { /* sequence number */ case 0: /* start of frame */ gspca_frame_add(gspca_dev, LAST_PACKET, NULL, 0); /* This should never happen */ if (len < 2) { gspca_err(gspca_dev, "Short SOF packet, ignoring\n\n\n\n\n"); gspca_dev->last_packet_type = DISCARD_PACKET; return; } #if IS_ENABLED(CONFIG_INPUT) if (data[0] & 0x20) { input_report_key(gspca_dev->input_dev, KEY_CAMERA, 1); input_sync(gspca_dev->input_dev); input_report_key(gspca_dev->input_dev, KEY_CAMERA, 0); input_sync(gspca_dev->input_dev); } #endif if (data[1] & 0x10) { /* compressed bayer */ gspca_frame_add(gspca_dev, FIRST_PACKET, data, len); } else { /* raw bayer (with a header, which we skip) */ if (sd->chip_revision == Rev012A) { data += 20; len -= 20; } else { data += 16; len -= 16; } gspca_frame_add(gspca_dev, FIRST_PACKET, data, len); } return; case 0xff: /* drop (empty mpackets) */ return; } gspca_frame_add(gspca_dev, INTER_PACKET, data, len); } static int sd_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); struct sd *sd = (struct sd *)gspca_dev; gspca_dev->usb_err = 0; if (!gspca_dev->streaming) return 0; switch (ctrl->id) { case V4L2_CID_BRIGHTNESS: setbrightness(gspca_dev, ctrl->val); break; case V4L2_CID_CONTRAST: /* hue/contrast control cluster for 72a */ setwhite(gspca_dev, sd->hue->val, ctrl->val); break; case V4L2_CID_HUE: /* just plain hue control for 12a */ setwhite(gspca_dev, ctrl->val, 0); break; case V4L2_CID_EXPOSURE: setexposure(gspca_dev, ctrl->val); break; case V4L2_CID_GAIN: setgain(gspca_dev, ctrl->val); break; case V4L2_CID_AUTOGAIN: setautogain(gspca_dev, ctrl->val); break; } return gspca_dev->usb_err; } static const struct v4l2_ctrl_ops sd_ctrl_ops = { .s_ctrl = sd_s_ctrl, }; static int sd_init_controls_12a(struct gspca_dev *gspca_dev) { struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 3); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_HUE, 1, 0x7f, 1, 0x40); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_BRIGHTNESS, -128, 127, 1, 0); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_EXPOSURE, 1, EXPOSURE_MAX, 1, 700); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_GAIN, 0, 255, 1, 63); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } return 0; } static int sd_init_controls_72a(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *)gspca_dev; struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 4); sd->contrast = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_CONTRAST, 0, 0x3f, 1, 0x20); sd->hue = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_HUE, 1, 0x7f, 1, 0x40); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_BRIGHTNESS, 0, 0x3f, 1, 0x20); sd->autogain = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_AUTOGAIN, 0, 1, 1, 1); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } v4l2_ctrl_cluster(2, &sd->contrast); return 0; } /* sub-driver description */ static const struct sd_desc sd_desc_12a = { .name = MODULE_NAME, .init_controls = sd_init_controls_12a, .config = sd_config, .init = sd_init_12a, .start = sd_start_12a, .stopN = sd_stopN, .pkt_scan = sd_pkt_scan, #if IS_ENABLED(CONFIG_INPUT) .other_input = 1, #endif }; static const struct sd_desc sd_desc_72a = { .name = MODULE_NAME, .init_controls = sd_init_controls_72a, .config = sd_config, .init = sd_init_72a, .start = sd_start_72a, .stopN = sd_stopN, .pkt_scan = sd_pkt_scan, .dq_callback = do_autogain, #if IS_ENABLED(CONFIG_INPUT) .other_input = 1, #endif }; static const struct sd_desc *sd_desc[2] = { &sd_desc_12a, &sd_desc_72a }; /* -- module initialisation -- */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x041e, 0x401a), .driver_info = Rev072A}, {USB_DEVICE(0x041e, 0x403b), .driver_info = Rev012A}, {USB_DEVICE(0x0458, 0x7004), .driver_info = Rev072A}, {USB_DEVICE(0x0461, 0x0815), .driver_info = Rev072A}, {USB_DEVICE(0x046d, 0x0928), .driver_info = Rev012A}, {USB_DEVICE(0x046d, 0x0929), .driver_info = Rev012A}, {USB_DEVICE(0x046d, 0x092a), .driver_info = Rev012A}, {USB_DEVICE(0x046d, 0x092b), .driver_info = Rev012A}, {USB_DEVICE(0x046d, 0x092c), .driver_info = Rev012A}, {USB_DEVICE(0x046d, 0x092d), .driver_info = Rev012A}, {USB_DEVICE(0x046d, 0x092e), .driver_info = Rev012A}, {USB_DEVICE(0x046d, 0x092f), .driver_info = Rev012A}, {USB_DEVICE(0x04fc, 0x0561), .driver_info = Rev072A}, {USB_DEVICE(0x060b, 0xa001), .driver_info = Rev072A}, {USB_DEVICE(0x10fd, 0x7e50), .driver_info = Rev072A}, {USB_DEVICE(0xabcd, 0xcdee), .driver_info = Rev072A}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, sd_desc[id->driver_info], sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver);
1 1 6 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2018 Oracle and/or its affiliates. All rights reserved. */ #include <crypto/aead.h> #include <linux/debugfs.h> #include <net/xfrm.h> #include "netdevsim.h" #define NSIM_IPSEC_AUTH_BITS 128 static ssize_t nsim_dbg_netdev_ops_read(struct file *filp, char __user *buffer, size_t count, loff_t *ppos) { struct netdevsim *ns = filp->private_data; struct nsim_ipsec *ipsec = &ns->ipsec; size_t bufsize; char *buf, *p; int len; int i; /* the buffer needed is * (num SAs * 3 lines each * ~60 bytes per line) + one more line */ bufsize = (ipsec->count * 4 * 60) + 60; buf = kzalloc(bufsize, GFP_KERNEL); if (!buf) return -ENOMEM; p = buf; p += scnprintf(p, bufsize - (p - buf), "SA count=%u tx=%u\n", ipsec->count, ipsec->tx); for (i = 0; i < NSIM_IPSEC_MAX_SA_COUNT; i++) { struct nsim_sa *sap = &ipsec->sa[i]; if (!sap->used) continue; if (sap->xs->props.family == AF_INET6) p += scnprintf(p, bufsize - (p - buf), "sa[%i] %cx ipaddr=%pI6c\n", i, (sap->rx ? 'r' : 't'), &sap->ipaddr); else p += scnprintf(p, bufsize - (p - buf), "sa[%i] %cx ipaddr=%pI4\n", i, (sap->rx ? 'r' : 't'), &sap->ipaddr[3]); p += scnprintf(p, bufsize - (p - buf), "sa[%i] spi=0x%08x proto=0x%x salt=0x%08x crypt=%d\n", i, be32_to_cpu(sap->xs->id.spi), sap->xs->id.proto, sap->salt, sap->crypt); p += scnprintf(p, bufsize - (p - buf), "sa[%i] key=0x%08x %08x %08x %08x\n", i, sap->key[0], sap->key[1], sap->key[2], sap->key[3]); } len = simple_read_from_buffer(buffer, count, ppos, buf, p - buf); kfree(buf); return len; } static const struct file_operations ipsec_dbg_fops = { .owner = THIS_MODULE, .open = simple_open, .read = nsim_dbg_netdev_ops_read, }; static int nsim_ipsec_find_empty_idx(struct nsim_ipsec *ipsec) { u32 i; if (ipsec->count == NSIM_IPSEC_MAX_SA_COUNT) return -ENOSPC; /* search sa table */ for (i = 0; i < NSIM_IPSEC_MAX_SA_COUNT; i++) { if (!ipsec->sa[i].used) return i; } return -ENOSPC; } static int nsim_ipsec_parse_proto_keys(struct net_device *dev, struct xfrm_state *xs, u32 *mykey, u32 *mysalt) { const char aes_gcm_name[] = "rfc4106(gcm(aes))"; unsigned char *key_data; char *alg_name = NULL; int key_len; if (!xs->aead) { netdev_err(dev, "Unsupported IPsec algorithm\n"); return -EINVAL; } if (xs->aead->alg_icv_len != NSIM_IPSEC_AUTH_BITS) { netdev_err(dev, "IPsec offload requires %d bit authentication\n", NSIM_IPSEC_AUTH_BITS); return -EINVAL; } key_data = &xs->aead->alg_key[0]; key_len = xs->aead->alg_key_len; alg_name = xs->aead->alg_name; if (strcmp(alg_name, aes_gcm_name)) { netdev_err(dev, "Unsupported IPsec algorithm - please use %s\n", aes_gcm_name); return -EINVAL; } /* 160 accounts for 16 byte key and 4 byte salt */ if (key_len > NSIM_IPSEC_AUTH_BITS) { *mysalt = ((u32 *)key_data)[4]; } else if (key_len == NSIM_IPSEC_AUTH_BITS) { *mysalt = 0; } else { netdev_err(dev, "IPsec hw offload only supports 128 bit keys with optional 32 bit salt\n"); return -EINVAL; } memcpy(mykey, key_data, 16); return 0; } static int nsim_ipsec_add_sa(struct net_device *dev, struct xfrm_state *xs, struct netlink_ext_ack *extack) { struct nsim_ipsec *ipsec; struct netdevsim *ns; struct nsim_sa sa; u16 sa_idx; int ret; ns = netdev_priv(dev); ipsec = &ns->ipsec; if (xs->id.proto != IPPROTO_ESP && xs->id.proto != IPPROTO_AH) { NL_SET_ERR_MSG_MOD(extack, "Unsupported protocol for ipsec offload"); return -EINVAL; } if (xs->calg) { NL_SET_ERR_MSG_MOD(extack, "Compression offload not supported"); return -EINVAL; } if (xs->xso.type != XFRM_DEV_OFFLOAD_CRYPTO) { NL_SET_ERR_MSG_MOD(extack, "Unsupported ipsec offload type"); return -EINVAL; } /* find the first unused index */ ret = nsim_ipsec_find_empty_idx(ipsec); if (ret < 0) { NL_SET_ERR_MSG_MOD(extack, "No space for SA in Rx table!"); return ret; } sa_idx = (u16)ret; memset(&sa, 0, sizeof(sa)); sa.used = true; sa.xs = xs; if (sa.xs->id.proto & IPPROTO_ESP) sa.crypt = xs->ealg || xs->aead; /* get the key and salt */ ret = nsim_ipsec_parse_proto_keys(dev, xs, sa.key, &sa.salt); if (ret) { NL_SET_ERR_MSG_MOD(extack, "Failed to get key data for SA table"); return ret; } if (xs->xso.dir == XFRM_DEV_OFFLOAD_IN) sa.rx = true; if (xs->props.family == AF_INET6) memcpy(sa.ipaddr, &xs->id.daddr.a6, 16); else memcpy(&sa.ipaddr[3], &xs->id.daddr.a4, 4); /* the preparations worked, so save the info */ memcpy(&ipsec->sa[sa_idx], &sa, sizeof(sa)); /* the XFRM stack doesn't like offload_handle == 0, * so add a bitflag in case our array index is 0 */ xs->xso.offload_handle = sa_idx | NSIM_IPSEC_VALID; ipsec->count++; return 0; } static void nsim_ipsec_del_sa(struct net_device *dev, struct xfrm_state *xs) { struct netdevsim *ns = netdev_priv(dev); struct nsim_ipsec *ipsec = &ns->ipsec; u16 sa_idx; sa_idx = xs->xso.offload_handle & ~NSIM_IPSEC_VALID; if (!ipsec->sa[sa_idx].used) { netdev_err(ns->netdev, "Invalid SA for delete sa_idx=%d\n", sa_idx); return; } memset(&ipsec->sa[sa_idx], 0, sizeof(struct nsim_sa)); ipsec->count--; } static const struct xfrmdev_ops nsim_xfrmdev_ops = { .xdo_dev_state_add = nsim_ipsec_add_sa, .xdo_dev_state_delete = nsim_ipsec_del_sa, }; bool nsim_ipsec_tx(struct netdevsim *ns, struct sk_buff *skb) { struct sec_path *sp = skb_sec_path(skb); struct nsim_ipsec *ipsec = &ns->ipsec; struct xfrm_state *xs; struct nsim_sa *tsa; u32 sa_idx; /* do we even need to check this packet? */ if (!sp) return true; if (unlikely(!sp->len)) { netdev_err(ns->netdev, "no xfrm state len = %d\n", sp->len); return false; } xs = xfrm_input_state(skb); if (unlikely(!xs)) { netdev_err(ns->netdev, "no xfrm_input_state() xs = %p\n", xs); return false; } sa_idx = xs->xso.offload_handle & ~NSIM_IPSEC_VALID; if (unlikely(sa_idx >= NSIM_IPSEC_MAX_SA_COUNT)) { netdev_err(ns->netdev, "bad sa_idx=%d max=%d\n", sa_idx, NSIM_IPSEC_MAX_SA_COUNT); return false; } tsa = &ipsec->sa[sa_idx]; if (unlikely(!tsa->used)) { netdev_err(ns->netdev, "unused sa_idx=%d\n", sa_idx); return false; } if (xs->id.proto != IPPROTO_ESP && xs->id.proto != IPPROTO_AH) { netdev_err(ns->netdev, "unexpected proto=%d\n", xs->id.proto); return false; } ipsec->tx++; return true; } void nsim_ipsec_init(struct netdevsim *ns) { ns->netdev->xfrmdev_ops = &nsim_xfrmdev_ops; #define NSIM_ESP_FEATURES (NETIF_F_HW_ESP | \ NETIF_F_HW_ESP_TX_CSUM | \ NETIF_F_GSO_ESP) ns->netdev->features |= NSIM_ESP_FEATURES; ns->netdev->hw_features |= NSIM_ESP_FEATURES; ns->netdev->hw_enc_features |= NSIM_ESP_FEATURES; ns->ipsec.pfile = debugfs_create_file("ipsec", 0400, ns->nsim_dev_port->ddir, ns, &ipsec_dbg_fops); } void nsim_ipsec_teardown(struct netdevsim *ns) { struct nsim_ipsec *ipsec = &ns->ipsec; if (ipsec->count) netdev_err(ns->netdev, "tearing down IPsec offload with %d SAs left\n", ipsec->count); debugfs_remove_recursive(ipsec->pfile); }
5 52 49 49 11 38 37 1 5 5 193 185 193 8 7 7 7 7 7 7 7 10 1 1 1 7 1 6 1 6 5 2 5 2 8 4 4 7 1 7 6 6 7 8 3 4 1 7 8 3 3 3 3 3 3 3 3 1 2 3 3 3 3 3 3 3 3 3 3 1 2 3 3 3 53 4 11 13 13 4 4 5 3 7 5 11 7 10 11 11 6 6 4 6 6 4 6 6 6 6 7 7 3 4 3 1 1 3 75 74 73 5 5 5 5 5 5 1 5 7 8 8 4 4 9 8 24 46 15 15 24 47 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 // SPDX-License-Identifier: GPL-2.0 /* * gendisk handling * * Portions Copyright (C) 2020 Christoph Hellwig */ #include <linux/module.h> #include <linux/ctype.h> #include <linux/fs.h> #include <linux/kdev_t.h> #include <linux/kernel.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/init.h> #include <linux/spinlock.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/kmod.h> #include <linux/major.h> #include <linux/mutex.h> #include <linux/idr.h> #include <linux/log2.h> #include <linux/pm_runtime.h> #include <linux/badblocks.h> #include <linux/part_stat.h> #include <linux/blktrace_api.h> #include "blk-throttle.h" #include "blk.h" #include "blk-mq-sched.h" #include "blk-rq-qos.h" #include "blk-cgroup.h" static struct kobject *block_depr; /* * Unique, monotonically increasing sequential number associated with block * devices instances (i.e. incremented each time a device is attached). * Associating uevents with block devices in userspace is difficult and racy: * the uevent netlink socket is lossy, and on slow and overloaded systems has * a very high latency. * Block devices do not have exclusive owners in userspace, any process can set * one up (e.g. loop devices). Moreover, device names can be reused (e.g. loop0 * can be reused again and again). * A userspace process setting up a block device and watching for its events * cannot thus reliably tell whether an event relates to the device it just set * up or another earlier instance with the same name. * This sequential number allows userspace processes to solve this problem, and * uniquely associate an uevent to the lifetime to a device. */ static atomic64_t diskseq; /* for extended dynamic devt allocation, currently only one major is used */ #define NR_EXT_DEVT (1 << MINORBITS) static DEFINE_IDA(ext_devt_ida); void set_capacity(struct gendisk *disk, sector_t sectors) { if (sectors > BLK_DEV_MAX_SECTORS) { pr_warn_once("%s: truncate capacity from %lld to %lld\n", disk->disk_name, sectors, BLK_DEV_MAX_SECTORS); sectors = BLK_DEV_MAX_SECTORS; } bdev_set_nr_sectors(disk->part0, sectors); } EXPORT_SYMBOL(set_capacity); /* * Set disk capacity and notify if the size is not currently zero and will not * be set to zero. Returns true if a uevent was sent, otherwise false. */ bool set_capacity_and_notify(struct gendisk *disk, sector_t size) { sector_t capacity = get_capacity(disk); char *envp[] = { "RESIZE=1", NULL }; set_capacity(disk, size); /* * Only print a message and send a uevent if the gendisk is user visible * and alive. This avoids spamming the log and udev when setting the * initial capacity during probing. */ if (size == capacity || !disk_live(disk) || (disk->flags & GENHD_FL_HIDDEN)) return false; pr_info_ratelimited("%s: detected capacity change from %lld to %lld\n", disk->disk_name, capacity, size); /* * Historically we did not send a uevent for changes to/from an empty * device. */ if (!capacity || !size) return false; kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); return true; } EXPORT_SYMBOL_GPL(set_capacity_and_notify); static void part_stat_read_all(struct block_device *part, struct disk_stats *stat) { int cpu; memset(stat, 0, sizeof(struct disk_stats)); for_each_possible_cpu(cpu) { struct disk_stats *ptr = per_cpu_ptr(part->bd_stats, cpu); int group; for (group = 0; group < NR_STAT_GROUPS; group++) { stat->nsecs[group] += ptr->nsecs[group]; stat->sectors[group] += ptr->sectors[group]; stat->ios[group] += ptr->ios[group]; stat->merges[group] += ptr->merges[group]; } stat->io_ticks += ptr->io_ticks; } } static void bdev_count_inflight_rw(struct block_device *part, unsigned int inflight[2], bool mq_driver) { int write = 0; int read = 0; int cpu; if (mq_driver) { blk_mq_in_driver_rw(part, inflight); return; } for_each_possible_cpu(cpu) { read += part_stat_local_read_cpu(part, in_flight[READ], cpu); write += part_stat_local_read_cpu(part, in_flight[WRITE], cpu); } /* * While iterating all CPUs, some IOs may be issued from a CPU already * traversed and complete on a CPU that has not yet been traversed, * causing the inflight number to be negative. */ inflight[READ] = read > 0 ? read : 0; inflight[WRITE] = write > 0 ? write : 0; } /** * bdev_count_inflight - get the number of inflight IOs for a block device. * * @part: the block device. * * Inflight here means started IO accounting, from bdev_start_io_acct() for * bio-based block device, and from blk_account_io_start() for rq-based block * device. */ unsigned int bdev_count_inflight(struct block_device *part) { unsigned int inflight[2] = {0}; bdev_count_inflight_rw(part, inflight, false); return inflight[READ] + inflight[WRITE]; } EXPORT_SYMBOL_GPL(bdev_count_inflight); /* * Can be deleted altogether. Later. * */ #define BLKDEV_MAJOR_HASH_SIZE 255 static struct blk_major_name { struct blk_major_name *next; int major; char name[16]; #ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD void (*probe)(dev_t devt); #endif } *major_names[BLKDEV_MAJOR_HASH_SIZE]; static DEFINE_MUTEX(major_names_lock); static DEFINE_SPINLOCK(major_names_spinlock); /* index in the above - for now: assume no multimajor ranges */ static inline int major_to_index(unsigned major) { return major % BLKDEV_MAJOR_HASH_SIZE; } #ifdef CONFIG_PROC_FS void blkdev_show(struct seq_file *seqf, off_t offset) { struct blk_major_name *dp; spin_lock(&major_names_spinlock); for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next) if (dp->major == offset) seq_printf(seqf, "%3d %s\n", dp->major, dp->name); spin_unlock(&major_names_spinlock); } #endif /* CONFIG_PROC_FS */ /** * __register_blkdev - register a new block device * * @major: the requested major device number [1..BLKDEV_MAJOR_MAX-1]. If * @major = 0, try to allocate any unused major number. * @name: the name of the new block device as a zero terminated string * @probe: pre-devtmpfs / pre-udev callback used to create disks when their * pre-created device node is accessed. When a probe call uses * add_disk() and it fails the driver must cleanup resources. This * interface may soon be removed. * * The @name must be unique within the system. * * The return value depends on the @major input parameter: * * - if a major device number was requested in range [1..BLKDEV_MAJOR_MAX-1] * then the function returns zero on success, or a negative error code * - if any unused major number was requested with @major = 0 parameter * then the return value is the allocated major number in range * [1..BLKDEV_MAJOR_MAX-1] or a negative error code otherwise * * See Documentation/admin-guide/devices.txt for the list of allocated * major numbers. * * Use register_blkdev instead for any new code. */ int __register_blkdev(unsigned int major, const char *name, void (*probe)(dev_t devt)) { struct blk_major_name **n, *p; int index, ret = 0; mutex_lock(&major_names_lock); /* temporary */ if (major == 0) { for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) { if (major_names[index] == NULL) break; } if (index == 0) { printk("%s: failed to get major for %s\n", __func__, name); ret = -EBUSY; goto out; } major = index; ret = major; } if (major >= BLKDEV_MAJOR_MAX) { pr_err("%s: major requested (%u) is greater than the maximum (%u) for %s\n", __func__, major, BLKDEV_MAJOR_MAX-1, name); ret = -EINVAL; goto out; } p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL); if (p == NULL) { ret = -ENOMEM; goto out; } p->major = major; #ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD p->probe = probe; #endif strscpy(p->name, name, sizeof(p->name)); p->next = NULL; index = major_to_index(major); spin_lock(&major_names_spinlock); for (n = &major_names[index]; *n; n = &(*n)->next) { if ((*n)->major == major) break; } if (!*n) *n = p; else ret = -EBUSY; spin_unlock(&major_names_spinlock); if (ret < 0) { printk("register_blkdev: cannot get major %u for %s\n", major, name); kfree(p); } out: mutex_unlock(&major_names_lock); return ret; } EXPORT_SYMBOL(__register_blkdev); void unregister_blkdev(unsigned int major, const char *name) { struct blk_major_name **n; struct blk_major_name *p = NULL; int index = major_to_index(major); mutex_lock(&major_names_lock); spin_lock(&major_names_spinlock); for (n = &major_names[index]; *n; n = &(*n)->next) if ((*n)->major == major) break; if (!*n || strcmp((*n)->name, name)) { WARN_ON(1); } else { p = *n; *n = p->next; } spin_unlock(&major_names_spinlock); mutex_unlock(&major_names_lock); kfree(p); } EXPORT_SYMBOL(unregister_blkdev); int blk_alloc_ext_minor(void) { int idx; idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT - 1, GFP_KERNEL); if (idx == -ENOSPC) return -EBUSY; return idx; } void blk_free_ext_minor(unsigned int minor) { ida_free(&ext_devt_ida, minor); } void disk_uevent(struct gendisk *disk, enum kobject_action action) { struct block_device *part; unsigned long idx; rcu_read_lock(); xa_for_each(&disk->part_tbl, idx, part) { if (bdev_is_partition(part) && !bdev_nr_sectors(part)) continue; if (!kobject_get_unless_zero(&part->bd_device.kobj)) continue; rcu_read_unlock(); kobject_uevent(bdev_kobj(part), action); put_device(&part->bd_device); rcu_read_lock(); } rcu_read_unlock(); } EXPORT_SYMBOL_GPL(disk_uevent); int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode) { struct file *file; int ret = 0; if (!disk_has_partscan(disk)) return -EINVAL; if (disk->open_partitions) return -EBUSY; /* * If the device is opened exclusively by current thread already, it's * safe to scan partitons, otherwise, use bd_prepare_to_claim() to * synchronize with other exclusive openers and other partition * scanners. */ if (!(mode & BLK_OPEN_EXCL)) { ret = bd_prepare_to_claim(disk->part0, disk_scan_partitions, NULL); if (ret) return ret; } set_bit(GD_NEED_PART_SCAN, &disk->state); file = bdev_file_open_by_dev(disk_devt(disk), mode & ~BLK_OPEN_EXCL, NULL, NULL); if (IS_ERR(file)) ret = PTR_ERR(file); else fput(file); /* * If blkdev_get_by_dev() failed early, GD_NEED_PART_SCAN is still set, * and this will cause that re-assemble partitioned raid device will * creat partition for underlying disk. */ clear_bit(GD_NEED_PART_SCAN, &disk->state); if (!(mode & BLK_OPEN_EXCL)) bd_abort_claiming(disk->part0, disk_scan_partitions); return ret; } static void add_disk_final(struct gendisk *disk) { struct device *ddev = disk_to_dev(disk); if (!(disk->flags & GENHD_FL_HIDDEN)) { /* Make sure the first partition scan will be proceed */ if (get_capacity(disk) && disk_has_partscan(disk)) set_bit(GD_NEED_PART_SCAN, &disk->state); bdev_add(disk->part0, ddev->devt); if (get_capacity(disk)) disk_scan_partitions(disk, BLK_OPEN_READ); /* * Announce the disk and partitions after all partitions are * created. (for hidden disks uevents remain suppressed forever) */ dev_set_uevent_suppress(ddev, 0); disk_uevent(disk, KOBJ_ADD); } blk_apply_bdi_limits(disk->bdi, &disk->queue->limits); disk_add_events(disk); set_bit(GD_ADDED, &disk->state); } static int __add_disk(struct device *parent, struct gendisk *disk, const struct attribute_group **groups, struct fwnode_handle *fwnode) { struct device *ddev = disk_to_dev(disk); int ret; if (WARN_ON_ONCE(bdev_nr_sectors(disk->part0) > BLK_DEV_MAX_SECTORS)) return -EINVAL; if (queue_is_mq(disk->queue)) { /* * ->submit_bio and ->poll_bio are bypassed for blk-mq drivers. */ if (disk->fops->submit_bio || disk->fops->poll_bio) return -EINVAL; } else { if (!disk->fops->submit_bio) return -EINVAL; bdev_set_flag(disk->part0, BD_HAS_SUBMIT_BIO); } /* * If the driver provides an explicit major number it also must provide * the number of minors numbers supported, and those will be used to * setup the gendisk. * Otherwise just allocate the device numbers for both the whole device * and all partitions from the extended dev_t space. */ ret = -EINVAL; if (disk->major) { if (WARN_ON(!disk->minors)) goto out; if (disk->minors > DISK_MAX_PARTS) { pr_err("block: can't allocate more than %d partitions\n", DISK_MAX_PARTS); disk->minors = DISK_MAX_PARTS; } if (disk->first_minor > MINORMASK || disk->minors > MINORMASK + 1 || disk->first_minor + disk->minors > MINORMASK + 1) goto out; } else { if (WARN_ON(disk->minors)) goto out; ret = blk_alloc_ext_minor(); if (ret < 0) goto out; disk->major = BLOCK_EXT_MAJOR; disk->first_minor = ret; } /* delay uevents, until we scanned partition table */ dev_set_uevent_suppress(ddev, 1); ddev->parent = parent; ddev->groups = groups; dev_set_name(ddev, "%s", disk->disk_name); if (fwnode) device_set_node(ddev, fwnode); if (!(disk->flags & GENHD_FL_HIDDEN)) ddev->devt = MKDEV(disk->major, disk->first_minor); ret = device_add(ddev); if (ret) goto out_free_ext_minor; ret = disk_alloc_events(disk); if (ret) goto out_device_del; ret = sysfs_create_link(block_depr, &ddev->kobj, kobject_name(&ddev->kobj)); if (ret) goto out_device_del; /* * avoid probable deadlock caused by allocating memory with * GFP_KERNEL in runtime_resume callback of its all ancestor * devices */ pm_runtime_set_memalloc_noio(ddev, true); disk->part0->bd_holder_dir = kobject_create_and_add("holders", &ddev->kobj); if (!disk->part0->bd_holder_dir) { ret = -ENOMEM; goto out_del_block_link; } disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); if (!disk->slave_dir) { ret = -ENOMEM; goto out_put_holder_dir; } ret = blk_register_queue(disk); if (ret) goto out_put_slave_dir; if (!(disk->flags & GENHD_FL_HIDDEN)) { ret = bdi_register(disk->bdi, "%u:%u", disk->major, disk->first_minor); if (ret) goto out_unregister_queue; bdi_set_owner(disk->bdi, ddev); ret = sysfs_create_link(&ddev->kobj, &disk->bdi->dev->kobj, "bdi"); if (ret) goto out_unregister_bdi; } else { /* * Even if the block_device for a hidden gendisk is not * registered, it needs to have a valid bd_dev so that the * freeing of the dynamic major works. */ disk->part0->bd_dev = MKDEV(disk->major, disk->first_minor); } return 0; out_unregister_bdi: if (!(disk->flags & GENHD_FL_HIDDEN)) bdi_unregister(disk->bdi); out_unregister_queue: blk_unregister_queue(disk); rq_qos_exit(disk->queue); out_put_slave_dir: kobject_put(disk->slave_dir); disk->slave_dir = NULL; out_put_holder_dir: kobject_put(disk->part0->bd_holder_dir); out_del_block_link: sysfs_remove_link(block_depr, dev_name(ddev)); pm_runtime_set_memalloc_noio(ddev, false); out_device_del: device_del(ddev); out_free_ext_minor: if (disk->major == BLOCK_EXT_MAJOR) blk_free_ext_minor(disk->first_minor); out: return ret; } /** * add_disk_fwnode - add disk information to kernel list with fwnode * @parent: parent device for the disk * @disk: per-device partitioning information * @groups: Additional per-device sysfs groups * @fwnode: attached disk fwnode * * This function registers the partitioning information in @disk * with the kernel. Also attach a fwnode to the disk device. */ int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk, const struct attribute_group **groups, struct fwnode_handle *fwnode) { struct blk_mq_tag_set *set; unsigned int memflags; int ret; if (queue_is_mq(disk->queue)) { set = disk->queue->tag_set; memflags = memalloc_noio_save(); down_read(&set->update_nr_hwq_lock); ret = __add_disk(parent, disk, groups, fwnode); up_read(&set->update_nr_hwq_lock); memalloc_noio_restore(memflags); } else { ret = __add_disk(parent, disk, groups, fwnode); } /* * add_disk_final() needn't to read `nr_hw_queues`, so move it out * of read lock `set->update_nr_hwq_lock` for avoiding unnecessary * lock dependency on `disk->open_mutex` from scanning partition. */ if (!ret) add_disk_final(disk); return ret; } EXPORT_SYMBOL_GPL(add_disk_fwnode); /** * device_add_disk - add disk information to kernel list * @parent: parent device for the disk * @disk: per-device partitioning information * @groups: Additional per-device sysfs groups * * This function registers the partitioning information in @disk * with the kernel. */ int __must_check device_add_disk(struct device *parent, struct gendisk *disk, const struct attribute_group **groups) { return add_disk_fwnode(parent, disk, groups, NULL); } EXPORT_SYMBOL(device_add_disk); static void blk_report_disk_dead(struct gendisk *disk, bool surprise) { struct block_device *bdev; unsigned long idx; /* * On surprise disk removal, bdev_mark_dead() may call into file * systems below. Make it clear that we're expecting to not hold * disk->open_mutex. */ lockdep_assert_not_held(&disk->open_mutex); rcu_read_lock(); xa_for_each(&disk->part_tbl, idx, bdev) { if (!kobject_get_unless_zero(&bdev->bd_device.kobj)) continue; rcu_read_unlock(); bdev_mark_dead(bdev, surprise); put_device(&bdev->bd_device); rcu_read_lock(); } rcu_read_unlock(); } static bool __blk_mark_disk_dead(struct gendisk *disk) { /* * Fail any new I/O. */ if (test_and_set_bit(GD_DEAD, &disk->state)) return false; if (test_bit(GD_OWNS_QUEUE, &disk->state)) blk_queue_flag_set(QUEUE_FLAG_DYING, disk->queue); /* * Stop buffered writers from dirtying pages that can't be written out. */ set_capacity(disk, 0); /* * Prevent new I/O from crossing bio_queue_enter(). */ return blk_queue_start_drain(disk->queue); } /** * blk_mark_disk_dead - mark a disk as dead * @disk: disk to mark as dead * * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O * to this disk. */ void blk_mark_disk_dead(struct gendisk *disk) { __blk_mark_disk_dead(disk); blk_report_disk_dead(disk, true); } EXPORT_SYMBOL_GPL(blk_mark_disk_dead); static void __del_gendisk(struct gendisk *disk) { struct request_queue *q = disk->queue; struct block_device *part; unsigned long idx; bool start_drain; might_sleep(); if (WARN_ON_ONCE(!disk_live(disk) && !(disk->flags & GENHD_FL_HIDDEN))) return; disk_del_events(disk); /* * Prevent new openers by unlinked the bdev inode. */ mutex_lock(&disk->open_mutex); xa_for_each(&disk->part_tbl, idx, part) bdev_unhash(part); mutex_unlock(&disk->open_mutex); /* * Tell the file system to write back all dirty data and shut down if * it hasn't been notified earlier. */ if (!test_bit(GD_DEAD, &disk->state)) blk_report_disk_dead(disk, false); /* * Drop all partitions now that the disk is marked dead. */ mutex_lock(&disk->open_mutex); start_drain = __blk_mark_disk_dead(disk); if (start_drain) blk_freeze_acquire_lock(q); xa_for_each_start(&disk->part_tbl, idx, part, 1) drop_partition(part); mutex_unlock(&disk->open_mutex); if (!(disk->flags & GENHD_FL_HIDDEN)) { sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); /* * Unregister bdi before releasing device numbers (as they can * get reused and we'd get clashes in sysfs). */ bdi_unregister(disk->bdi); } blk_unregister_queue(disk); kobject_put(disk->part0->bd_holder_dir); kobject_put(disk->slave_dir); disk->slave_dir = NULL; part_stat_set_all(disk->part0, 0); disk->part0->bd_stamp = 0; sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); device_del(disk_to_dev(disk)); blk_mq_freeze_queue_wait(q); blk_throtl_cancel_bios(disk); blk_sync_queue(q); blk_flush_integrity(); if (queue_is_mq(q)) blk_mq_cancel_work_sync(q); rq_qos_exit(q); /* * If the disk does not own the queue, allow using passthrough requests * again. Else leave the queue frozen to fail all I/O. */ if (!test_bit(GD_OWNS_QUEUE, &disk->state)) __blk_mq_unfreeze_queue(q, true); else if (queue_is_mq(q)) blk_mq_exit_queue(q); if (start_drain) blk_unfreeze_release_lock(q); } static void disable_elv_switch(struct request_queue *q) { struct blk_mq_tag_set *set = q->tag_set; WARN_ON_ONCE(!queue_is_mq(q)); down_write(&set->update_nr_hwq_lock); blk_queue_flag_set(QUEUE_FLAG_NO_ELV_SWITCH, q); up_write(&set->update_nr_hwq_lock); } /** * del_gendisk - remove the gendisk * @disk: the struct gendisk to remove * * Removes the gendisk and all its associated resources. This deletes the * partitions associated with the gendisk, and unregisters the associated * request_queue. * * This is the counter to the respective __device_add_disk() call. * * The final removal of the struct gendisk happens when its refcount reaches 0 * with put_disk(), which should be called after del_gendisk(), if * __device_add_disk() was used. * * Drivers exist which depend on the release of the gendisk to be synchronous, * it should not be deferred. * * Context: can sleep */ void del_gendisk(struct gendisk *disk) { struct blk_mq_tag_set *set; unsigned int memflags; if (!queue_is_mq(disk->queue)) { __del_gendisk(disk); } else { set = disk->queue->tag_set; disable_elv_switch(disk->queue); memflags = memalloc_noio_save(); down_read(&set->update_nr_hwq_lock); __del_gendisk(disk); up_read(&set->update_nr_hwq_lock); memalloc_noio_restore(memflags); } } EXPORT_SYMBOL(del_gendisk); /** * invalidate_disk - invalidate the disk * @disk: the struct gendisk to invalidate * * A helper to invalidates the disk. It will clean the disk's associated * buffer/page caches and reset its internal states so that the disk * can be reused by the drivers. * * Context: can sleep */ void invalidate_disk(struct gendisk *disk) { struct block_device *bdev = disk->part0; invalidate_bdev(bdev); bdev->bd_mapping->wb_err = 0; set_capacity(disk, 0); } EXPORT_SYMBOL(invalidate_disk); /* sysfs access to bad-blocks list. */ static ssize_t disk_badblocks_show(struct device *dev, struct device_attribute *attr, char *page) { struct gendisk *disk = dev_to_disk(dev); if (!disk->bb) return sysfs_emit(page, "\n"); return badblocks_show(disk->bb, page, 0); } static ssize_t disk_badblocks_store(struct device *dev, struct device_attribute *attr, const char *page, size_t len) { struct gendisk *disk = dev_to_disk(dev); if (!disk->bb) return -ENXIO; return badblocks_store(disk->bb, page, len, 0); } #ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD static bool blk_probe_dev(dev_t devt) { unsigned int major = MAJOR(devt); struct blk_major_name **n; mutex_lock(&major_names_lock); for (n = &major_names[major_to_index(major)]; *n; n = &(*n)->next) { if ((*n)->major == major && (*n)->probe) { (*n)->probe(devt); mutex_unlock(&major_names_lock); return true; } } mutex_unlock(&major_names_lock); return false; } void blk_request_module(dev_t devt) { int error; if (blk_probe_dev(devt)) return; error = request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)); /* Make old-style 2.4 aliases work */ if (error > 0) error = request_module("block-major-%d", MAJOR(devt)); if (!error) blk_probe_dev(devt); } #endif /* CONFIG_BLOCK_LEGACY_AUTOLOAD */ #ifdef CONFIG_PROC_FS /* iterator */ static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos) { loff_t skip = *pos; struct class_dev_iter *iter; struct device *dev; iter = kmalloc(sizeof(*iter), GFP_KERNEL); if (!iter) return ERR_PTR(-ENOMEM); seqf->private = iter; class_dev_iter_init(iter, &block_class, NULL, &disk_type); do { dev = class_dev_iter_next(iter); if (!dev) return NULL; } while (skip--); return dev_to_disk(dev); } static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos) { struct device *dev; (*pos)++; dev = class_dev_iter_next(seqf->private); if (dev) return dev_to_disk(dev); return NULL; } static void disk_seqf_stop(struct seq_file *seqf, void *v) { struct class_dev_iter *iter = seqf->private; /* stop is called even after start failed :-( */ if (iter) { class_dev_iter_exit(iter); kfree(iter); seqf->private = NULL; } } static void *show_partition_start(struct seq_file *seqf, loff_t *pos) { void *p; p = disk_seqf_start(seqf, pos); if (!IS_ERR_OR_NULL(p) && !*pos) seq_puts(seqf, "major minor #blocks name\n\n"); return p; } static int show_partition(struct seq_file *seqf, void *v) { struct gendisk *sgp = v; struct block_device *part; unsigned long idx; if (!get_capacity(sgp) || (sgp->flags & GENHD_FL_HIDDEN)) return 0; rcu_read_lock(); xa_for_each(&sgp->part_tbl, idx, part) { if (!bdev_nr_sectors(part)) continue; seq_printf(seqf, "%4d %7d %10llu %pg\n", MAJOR(part->bd_dev), MINOR(part->bd_dev), bdev_nr_sectors(part) >> 1, part); } rcu_read_unlock(); return 0; } static const struct seq_operations partitions_op = { .start = show_partition_start, .next = disk_seqf_next, .stop = disk_seqf_stop, .show = show_partition }; #endif static int __init genhd_device_init(void) { int error; error = class_register(&block_class); if (unlikely(error)) return error; blk_dev_init(); register_blkdev(BLOCK_EXT_MAJOR, "blkext"); /* create top-level block dir */ block_depr = kobject_create_and_add("block", NULL); return 0; } subsys_initcall(genhd_device_init); static ssize_t disk_range_show(struct device *dev, struct device_attribute *attr, char *buf) { struct gendisk *disk = dev_to_disk(dev); return sysfs_emit(buf, "%d\n", disk->minors); } static ssize_t disk_ext_range_show(struct device *dev, struct device_attribute *attr, char *buf) { struct gendisk *disk = dev_to_disk(dev); return sysfs_emit(buf, "%d\n", (disk->flags & GENHD_FL_NO_PART) ? 1 : DISK_MAX_PARTS); } static ssize_t disk_removable_show(struct device *dev, struct device_attribute *attr, char *buf) { struct gendisk *disk = dev_to_disk(dev); return sysfs_emit(buf, "%d\n", (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0)); } static ssize_t disk_hidden_show(struct device *dev, struct device_attribute *attr, char *buf) { struct gendisk *disk = dev_to_disk(dev); return sysfs_emit(buf, "%d\n", (disk->flags & GENHD_FL_HIDDEN ? 1 : 0)); } static ssize_t disk_ro_show(struct device *dev, struct device_attribute *attr, char *buf) { struct gendisk *disk = dev_to_disk(dev); return sysfs_emit(buf, "%d\n", get_disk_ro(disk) ? 1 : 0); } ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%llu\n", bdev_nr_sectors(dev_to_bdev(dev))); } ssize_t part_stat_show(struct device *dev, struct device_attribute *attr, char *buf) { struct block_device *bdev = dev_to_bdev(dev); struct disk_stats stat; unsigned int inflight; inflight = bdev_count_inflight(bdev); if (inflight) { part_stat_lock(); update_io_ticks(bdev, jiffies, true); part_stat_unlock(); } part_stat_read_all(bdev, &stat); return sysfs_emit(buf, "%8lu %8lu %8llu %8u " "%8lu %8lu %8llu %8u " "%8u %8u %8u " "%8lu %8lu %8llu %8u " "%8lu %8u" "\n", stat.ios[STAT_READ], stat.merges[STAT_READ], (unsigned long long)stat.sectors[STAT_READ], (unsigned int)div_u64(stat.nsecs[STAT_READ], NSEC_PER_MSEC), stat.ios[STAT_WRITE], stat.merges[STAT_WRITE], (unsigned long long)stat.sectors[STAT_WRITE], (unsigned int)div_u64(stat.nsecs[STAT_WRITE], NSEC_PER_MSEC), inflight, jiffies_to_msecs(stat.io_ticks), (unsigned int)div_u64(stat.nsecs[STAT_READ] + stat.nsecs[STAT_WRITE] + stat.nsecs[STAT_DISCARD] + stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC), stat.ios[STAT_DISCARD], stat.merges[STAT_DISCARD], (unsigned long long)stat.sectors[STAT_DISCARD], (unsigned int)div_u64(stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC), stat.ios[STAT_FLUSH], (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC)); } /* * Show the number of IOs issued to driver. * For bio-based device, started from bdev_start_io_acct(); * For rq-based device, started from blk_mq_start_request(); */ ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, char *buf) { struct block_device *bdev = dev_to_bdev(dev); struct request_queue *q = bdev_get_queue(bdev); unsigned int inflight[2] = {0}; bdev_count_inflight_rw(bdev, inflight, queue_is_mq(q)); return sysfs_emit(buf, "%8u %8u\n", inflight[READ], inflight[WRITE]); } static ssize_t disk_capability_show(struct device *dev, struct device_attribute *attr, char *buf) { dev_warn_once(dev, "the capability attribute has been deprecated.\n"); return sysfs_emit(buf, "0\n"); } static ssize_t disk_alignment_offset_show(struct device *dev, struct device_attribute *attr, char *buf) { struct gendisk *disk = dev_to_disk(dev); return sysfs_emit(buf, "%d\n", bdev_alignment_offset(disk->part0)); } static ssize_t disk_discard_alignment_show(struct device *dev, struct device_attribute *attr, char *buf) { struct gendisk *disk = dev_to_disk(dev); return sysfs_emit(buf, "%d\n", bdev_alignment_offset(disk->part0)); } static ssize_t diskseq_show(struct device *dev, struct device_attribute *attr, char *buf) { struct gendisk *disk = dev_to_disk(dev); return sysfs_emit(buf, "%llu\n", disk->diskseq); } static ssize_t partscan_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%u\n", disk_has_partscan(dev_to_disk(dev))); } static DEVICE_ATTR(range, 0444, disk_range_show, NULL); static DEVICE_ATTR(ext_range, 0444, disk_ext_range_show, NULL); static DEVICE_ATTR(removable, 0444, disk_removable_show, NULL); static DEVICE_ATTR(hidden, 0444, disk_hidden_show, NULL); static DEVICE_ATTR(ro, 0444, disk_ro_show, NULL); static DEVICE_ATTR(size, 0444, part_size_show, NULL); static DEVICE_ATTR(alignment_offset, 0444, disk_alignment_offset_show, NULL); static DEVICE_ATTR(discard_alignment, 0444, disk_discard_alignment_show, NULL); static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL); static DEVICE_ATTR(stat, 0444, part_stat_show, NULL); static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL); static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store); static DEVICE_ATTR(diskseq, 0444, diskseq_show, NULL); static DEVICE_ATTR(partscan, 0444, partscan_show, NULL); #ifdef CONFIG_FAIL_MAKE_REQUEST ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", bdev_test_flag(dev_to_bdev(dev), BD_MAKE_IT_FAIL)); } ssize_t part_fail_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { int i; if (count > 0 && sscanf(buf, "%d", &i) > 0) { if (i) bdev_set_flag(dev_to_bdev(dev), BD_MAKE_IT_FAIL); else bdev_clear_flag(dev_to_bdev(dev), BD_MAKE_IT_FAIL); } return count; } static struct device_attribute dev_attr_fail = __ATTR(make-it-fail, 0644, part_fail_show, part_fail_store); #endif /* CONFIG_FAIL_MAKE_REQUEST */ #ifdef CONFIG_FAIL_IO_TIMEOUT static struct device_attribute dev_attr_fail_timeout = __ATTR(io-timeout-fail, 0644, part_timeout_show, part_timeout_store); #endif static struct attribute *disk_attrs[] = { &dev_attr_range.attr, &dev_attr_ext_range.attr, &dev_attr_removable.attr, &dev_attr_hidden.attr, &dev_attr_ro.attr, &dev_attr_size.attr, &dev_attr_alignment_offset.attr, &dev_attr_discard_alignment.attr, &dev_attr_capability.attr, &dev_attr_stat.attr, &dev_attr_inflight.attr, &dev_attr_badblocks.attr, &dev_attr_events.attr, &dev_attr_events_async.attr, &dev_attr_events_poll_msecs.attr, &dev_attr_diskseq.attr, &dev_attr_partscan.attr, #ifdef CONFIG_FAIL_MAKE_REQUEST &dev_attr_fail.attr, #endif #ifdef CONFIG_FAIL_IO_TIMEOUT &dev_attr_fail_timeout.attr, #endif NULL }; static umode_t disk_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = container_of(kobj, typeof(*dev), kobj); struct gendisk *disk = dev_to_disk(dev); if (a == &dev_attr_badblocks.attr && !disk->bb) return 0; return a->mode; } static struct attribute_group disk_attr_group = { .attrs = disk_attrs, .is_visible = disk_visible, }; static const struct attribute_group *disk_attr_groups[] = { &disk_attr_group, #ifdef CONFIG_BLK_DEV_IO_TRACE &blk_trace_attr_group, #endif #ifdef CONFIG_BLK_DEV_INTEGRITY &blk_integrity_attr_group, #endif NULL }; /** * disk_release - releases all allocated resources of the gendisk * @dev: the device representing this disk * * This function releases all allocated resources of the gendisk. * * Drivers which used __device_add_disk() have a gendisk with a request_queue * assigned. Since the request_queue sits on top of the gendisk for these * drivers we also call blk_put_queue() for them, and we expect the * request_queue refcount to reach 0 at this point, and so the request_queue * will also be freed prior to the disk. * * Context: can sleep */ static void disk_release(struct device *dev) { struct gendisk *disk = dev_to_disk(dev); might_sleep(); WARN_ON_ONCE(disk_live(disk)); blk_trace_remove(disk->queue); /* * To undo the all initialization from blk_mq_init_allocated_queue in * case of a probe failure where add_disk is never called we have to * call blk_mq_exit_queue here. We can't do this for the more common * teardown case (yet) as the tagset can be gone by the time the disk * is released once it was added. */ if (queue_is_mq(disk->queue) && test_bit(GD_OWNS_QUEUE, &disk->state) && !test_bit(GD_ADDED, &disk->state)) blk_mq_exit_queue(disk->queue); blkcg_exit_disk(disk); bioset_exit(&disk->bio_split); disk_release_events(disk); kfree(disk->random); disk_free_zone_resources(disk); xa_destroy(&disk->part_tbl); kobject_put(&disk->queue_kobj); disk->queue->disk = NULL; blk_put_queue(disk->queue); if (test_bit(GD_ADDED, &disk->state) && disk->fops->free_disk) disk->fops->free_disk(disk); bdev_drop(disk->part0); /* frees the disk */ } static int block_uevent(const struct device *dev, struct kobj_uevent_env *env) { const struct gendisk *disk = dev_to_disk(dev); return add_uevent_var(env, "DISKSEQ=%llu", disk->diskseq); } const struct class block_class = { .name = "block", .dev_uevent = block_uevent, }; static char *block_devnode(const struct device *dev, umode_t *mode, kuid_t *uid, kgid_t *gid) { struct gendisk *disk = dev_to_disk(dev); if (disk->fops->devnode) return disk->fops->devnode(disk, mode); return NULL; } const struct device_type disk_type = { .name = "disk", .groups = disk_attr_groups, .release = disk_release, .devnode = block_devnode, }; #ifdef CONFIG_PROC_FS /* * aggregate disk stat collector. Uses the same stats that the sysfs * entries do, above, but makes them available through one seq_file. * * The output looks suspiciously like /proc/partitions with a bunch of * extra fields. */ static int diskstats_show(struct seq_file *seqf, void *v) { struct gendisk *gp = v; struct block_device *hd; unsigned int inflight; struct disk_stats stat; unsigned long idx; /* if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next) seq_puts(seqf, "major minor name" " rio rmerge rsect ruse wio wmerge " "wsect wuse running use aveq" "\n\n"); */ rcu_read_lock(); xa_for_each(&gp->part_tbl, idx, hd) { if (bdev_is_partition(hd) && !bdev_nr_sectors(hd)) continue; inflight = bdev_count_inflight(hd); if (inflight) { part_stat_lock(); update_io_ticks(hd, jiffies, true); part_stat_unlock(); } part_stat_read_all(hd, &stat); seq_put_decimal_ull_width(seqf, "", MAJOR(hd->bd_dev), 4); seq_put_decimal_ull_width(seqf, " ", MINOR(hd->bd_dev), 7); seq_printf(seqf, " %pg", hd); seq_put_decimal_ull(seqf, " ", stat.ios[STAT_READ]); seq_put_decimal_ull(seqf, " ", stat.merges[STAT_READ]); seq_put_decimal_ull(seqf, " ", stat.sectors[STAT_READ]); seq_put_decimal_ull(seqf, " ", (unsigned int)div_u64(stat.nsecs[STAT_READ], NSEC_PER_MSEC)); seq_put_decimal_ull(seqf, " ", stat.ios[STAT_WRITE]); seq_put_decimal_ull(seqf, " ", stat.merges[STAT_WRITE]); seq_put_decimal_ull(seqf, " ", stat.sectors[STAT_WRITE]); seq_put_decimal_ull(seqf, " ", (unsigned int)div_u64(stat.nsecs[STAT_WRITE], NSEC_PER_MSEC)); seq_put_decimal_ull(seqf, " ", inflight); seq_put_decimal_ull(seqf, " ", jiffies_to_msecs(stat.io_ticks)); seq_put_decimal_ull(seqf, " ", (unsigned int)div_u64(stat.nsecs[STAT_READ] + stat.nsecs[STAT_WRITE] + stat.nsecs[STAT_DISCARD] + stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC)); seq_put_decimal_ull(seqf, " ", stat.ios[STAT_DISCARD]); seq_put_decimal_ull(seqf, " ", stat.merges[STAT_DISCARD]); seq_put_decimal_ull(seqf, " ", stat.sectors[STAT_DISCARD]); seq_put_decimal_ull(seqf, " ", (unsigned int)div_u64(stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC)); seq_put_decimal_ull(seqf, " ", stat.ios[STAT_FLUSH]); seq_put_decimal_ull(seqf, " ", (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC)); seq_putc(seqf, '\n'); } rcu_read_unlock(); return 0; } static const struct seq_operations diskstats_op = { .start = disk_seqf_start, .next = disk_seqf_next, .stop = disk_seqf_stop, .show = diskstats_show }; static int __init proc_genhd_init(void) { proc_create_seq("diskstats", 0, NULL, &diskstats_op); proc_create_seq("partitions", 0, NULL, &partitions_op); return 0; } module_init(proc_genhd_init); #endif /* CONFIG_PROC_FS */ dev_t part_devt(struct gendisk *disk, u8 partno) { struct block_device *part; dev_t devt = 0; rcu_read_lock(); part = xa_load(&disk->part_tbl, partno); if (part) devt = part->bd_dev; rcu_read_unlock(); return devt; } struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, struct lock_class_key *lkclass) { struct gendisk *disk; disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id); if (!disk) return NULL; if (bioset_init(&disk->bio_split, BIO_POOL_SIZE, 0, 0)) goto out_free_disk; disk->bdi = bdi_alloc(node_id); if (!disk->bdi) goto out_free_bioset; /* bdev_alloc() might need the queue, set before the first call */ disk->queue = q; disk->part0 = bdev_alloc(disk, 0); if (!disk->part0) goto out_free_bdi; disk->node_id = node_id; mutex_init(&disk->open_mutex); xa_init(&disk->part_tbl); if (xa_insert(&disk->part_tbl, 0, disk->part0, GFP_KERNEL)) goto out_destroy_part_tbl; if (blkcg_init_disk(disk)) goto out_erase_part0; disk_init_zone_resources(disk); rand_initialize_disk(disk); disk_to_dev(disk)->class = &block_class; disk_to_dev(disk)->type = &disk_type; device_initialize(disk_to_dev(disk)); inc_diskseq(disk); q->disk = disk; lockdep_init_map(&disk->lockdep_map, "(bio completion)", lkclass, 0); #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED INIT_LIST_HEAD(&disk->slave_bdevs); #endif mutex_init(&disk->rqos_state_mutex); kobject_init(&disk->queue_kobj, &blk_queue_ktype); return disk; out_erase_part0: xa_erase(&disk->part_tbl, 0); out_destroy_part_tbl: xa_destroy(&disk->part_tbl); disk->part0->bd_disk = NULL; bdev_drop(disk->part0); out_free_bdi: bdi_put(disk->bdi); out_free_bioset: bioset_exit(&disk->bio_split); out_free_disk: kfree(disk); return NULL; } struct gendisk *__blk_alloc_disk(struct queue_limits *lim, int node, struct lock_class_key *lkclass) { struct queue_limits default_lim = { }; struct request_queue *q; struct gendisk *disk; q = blk_alloc_queue(lim ? lim : &default_lim, node); if (IS_ERR(q)) return ERR_CAST(q); disk = __alloc_disk_node(q, node, lkclass); if (!disk) { blk_put_queue(q); return ERR_PTR(-ENOMEM); } set_bit(GD_OWNS_QUEUE, &disk->state); return disk; } EXPORT_SYMBOL(__blk_alloc_disk); /** * put_disk - decrements the gendisk refcount * @disk: the struct gendisk to decrement the refcount for * * This decrements the refcount for the struct gendisk. When this reaches 0 * we'll have disk_release() called. * * Note: for blk-mq disk put_disk must be called before freeing the tag_set * when handling probe errors (that is before add_disk() is called). * * Context: Any context, but the last reference must not be dropped from * atomic context. */ void put_disk(struct gendisk *disk) { if (disk) put_device(disk_to_dev(disk)); } EXPORT_SYMBOL(put_disk); static void set_disk_ro_uevent(struct gendisk *gd, int ro) { char event[] = "DISK_RO=1"; char *envp[] = { event, NULL }; if (!ro) event[8] = '0'; kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); } /** * set_disk_ro - set a gendisk read-only * @disk: gendisk to operate on * @read_only: %true to set the disk read-only, %false set the disk read/write * * This function is used to indicate whether a given disk device should have its * read-only flag set. set_disk_ro() is typically used by device drivers to * indicate whether the underlying physical device is write-protected. */ void set_disk_ro(struct gendisk *disk, bool read_only) { if (read_only) { if (test_and_set_bit(GD_READ_ONLY, &disk->state)) return; } else { if (!test_and_clear_bit(GD_READ_ONLY, &disk->state)) return; } set_disk_ro_uevent(disk, read_only); } EXPORT_SYMBOL(set_disk_ro); void inc_diskseq(struct gendisk *disk) { disk->diskseq = atomic64_inc_return(&diskseq); }
8513 1412 7280 48 46 47 48 48 13 48 23 23 17 634 1 626 623 9 6 5 4 623 2 624 624 623 1 13 9 4 9 37 66 1132 6283 595 596 598 596 18 18 18 18 18 17 17 1 710 59 1025 382 385 380 97 334 240 247 243 29 237 242 828 826 830 826 498 503 43 463 495 29 29 30 29 258 71 10 8236 7195 7186 6410 6399 505 5965 6468 6412 889 1235 3 21 4 1 19 20 19 18 1 14 2 3 7 1 1 5 4 37 37 1 1 2 1 4 4 3 1 9 1 2 2 5 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 // SPDX-License-Identifier: GPL-2.0-only /* * Generic pidhash and scalable, time-bounded PID allocator * * (C) 2002-2003 Nadia Yvette Chambers, IBM * (C) 2004 Nadia Yvette Chambers, Oracle * (C) 2002-2004 Ingo Molnar, Red Hat * * pid-structures are backing objects for tasks sharing a given ID to chain * against. There is very little to them aside from hashing them and * parking tasks using given ID's on a list. * * The hash is always changed with the tasklist_lock write-acquired, * and the hash is only accessed with the tasklist_lock at least * read-acquired, so there's no additional SMP locking needed here. * * We have a list of bitmap pages, which bitmaps represent the PID space. * Allocating and freeing PIDs is completely lockless. The worst-case * allocation scenario when all but one out of 1 million PIDs possible are * allocated already: the scanning of 32 list entries and at most PAGE_SIZE * bytes. The typical fastpath is a single successful setbit. Freeing is O(1). * * Pid namespaces: * (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc. * (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM * Many thanks to Oleg Nesterov for comments and help * */ #include <linux/mm.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/rculist.h> #include <linux/memblock.h> #include <linux/pid_namespace.h> #include <linux/init_task.h> #include <linux/syscalls.h> #include <linux/proc_ns.h> #include <linux/refcount.h> #include <linux/anon_inodes.h> #include <linux/sched/signal.h> #include <linux/sched/task.h> #include <linux/idr.h> #include <linux/pidfs.h> #include <linux/seqlock.h> #include <net/sock.h> #include <uapi/linux/pidfd.h> struct pid init_struct_pid = { .count = REFCOUNT_INIT(1), .tasks = { { .first = NULL }, { .first = NULL }, { .first = NULL }, }, .level = 0, .numbers = { { .nr = 0, .ns = &init_pid_ns, }, } }; static int pid_max_min = RESERVED_PIDS + 1; static int pid_max_max = PID_MAX_LIMIT; /* * PID-map pages start out as NULL, they get allocated upon * first use and are never deallocated. This way a low pid_max * value does not cause lots of bitmaps to be allocated, but * the scheme scales to up to 4 million PIDs, runtime. */ struct pid_namespace init_pid_ns = { .ns = NS_COMMON_INIT(init_pid_ns), .idr = IDR_INIT(init_pid_ns.idr), .pid_allocated = PIDNS_ADDING, .level = 0, .child_reaper = &init_task, .user_ns = &init_user_ns, .pid_max = PID_MAX_DEFAULT, #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) .memfd_noexec_scope = MEMFD_NOEXEC_SCOPE_EXEC, #endif }; EXPORT_SYMBOL_GPL(init_pid_ns); static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); seqcount_spinlock_t pidmap_lock_seq = SEQCNT_SPINLOCK_ZERO(pidmap_lock_seq, &pidmap_lock); void put_pid(struct pid *pid) { struct pid_namespace *ns; if (!pid) return; ns = pid->numbers[pid->level].ns; if (refcount_dec_and_test(&pid->count)) { pidfs_free_pid(pid); kmem_cache_free(ns->pid_cachep, pid); put_pid_ns(ns); } } EXPORT_SYMBOL_GPL(put_pid); static void delayed_put_pid(struct rcu_head *rhp) { struct pid *pid = container_of(rhp, struct pid, rcu); put_pid(pid); } void free_pid(struct pid *pid) { int i; struct pid_namespace *active_ns; lockdep_assert_not_held(&tasklist_lock); active_ns = pid->numbers[pid->level].ns; ns_ref_active_put(active_ns); spin_lock(&pidmap_lock); for (i = 0; i <= pid->level; i++) { struct upid *upid = pid->numbers + i; struct pid_namespace *ns = upid->ns; switch (--ns->pid_allocated) { case 2: case 1: /* When all that is left in the pid namespace * is the reaper wake up the reaper. The reaper * may be sleeping in zap_pid_ns_processes(). */ wake_up_process(ns->child_reaper); break; case PIDNS_ADDING: /* Handle a fork failure of the first process */ WARN_ON(ns->child_reaper); ns->pid_allocated = 0; break; } idr_remove(&ns->idr, upid->nr); } pidfs_remove_pid(pid); spin_unlock(&pidmap_lock); call_rcu(&pid->rcu, delayed_put_pid); } void free_pids(struct pid **pids) { int tmp; /* * This can batch pidmap_lock. */ for (tmp = PIDTYPE_MAX; --tmp >= 0; ) if (pids[tmp]) free_pid(pids[tmp]); } struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, size_t set_tid_size) { struct pid *pid; enum pid_type type; int i, nr; struct pid_namespace *tmp; struct upid *upid; int retval = -ENOMEM; /* * set_tid_size contains the size of the set_tid array. Starting at * the most nested currently active PID namespace it tells alloc_pid() * which PID to set for a process in that most nested PID namespace * up to set_tid_size PID namespaces. It does not have to set the PID * for a process in all nested PID namespaces but set_tid_size must * never be greater than the current ns->level + 1. */ if (set_tid_size > ns->level + 1) return ERR_PTR(-EINVAL); pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL); if (!pid) return ERR_PTR(retval); tmp = ns; pid->level = ns->level; for (i = ns->level; i >= 0; i--) { int tid = 0; int pid_max = READ_ONCE(tmp->pid_max); if (set_tid_size) { tid = set_tid[ns->level - i]; retval = -EINVAL; if (tid < 1 || tid >= pid_max) goto out_free; /* * Also fail if a PID != 1 is requested and * no PID 1 exists. */ if (tid != 1 && !tmp->child_reaper) goto out_free; retval = -EPERM; if (!checkpoint_restore_ns_capable(tmp->user_ns)) goto out_free; set_tid_size--; } idr_preload(GFP_KERNEL); spin_lock(&pidmap_lock); if (tid) { nr = idr_alloc(&tmp->idr, NULL, tid, tid + 1, GFP_ATOMIC); /* * If ENOSPC is returned it means that the PID is * alreay in use. Return EEXIST in that case. */ if (nr == -ENOSPC) nr = -EEXIST; } else { int pid_min = 1; /* * init really needs pid 1, but after reaching the * maximum wrap back to RESERVED_PIDS */ if (idr_get_cursor(&tmp->idr) > RESERVED_PIDS) pid_min = RESERVED_PIDS; /* * Store a null pointer so find_pid_ns does not find * a partially initialized PID (see below). */ nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min, pid_max, GFP_ATOMIC); } spin_unlock(&pidmap_lock); idr_preload_end(); if (nr < 0) { retval = (nr == -ENOSPC) ? -EAGAIN : nr; goto out_free; } pid->numbers[i].nr = nr; pid->numbers[i].ns = tmp; tmp = tmp->parent; } /* * ENOMEM is not the most obvious choice especially for the case * where the child subreaper has already exited and the pid * namespace denies the creation of any new processes. But ENOMEM * is what we have exposed to userspace for a long time and it is * documented behavior for pid namespaces. So we can't easily * change it even if there were an error code better suited. */ retval = -ENOMEM; get_pid_ns(ns); refcount_set(&pid->count, 1); spin_lock_init(&pid->lock); for (type = 0; type < PIDTYPE_MAX; ++type) INIT_HLIST_HEAD(&pid->tasks[type]); init_waitqueue_head(&pid->wait_pidfd); INIT_HLIST_HEAD(&pid->inodes); upid = pid->numbers + ns->level; idr_preload(GFP_KERNEL); spin_lock(&pidmap_lock); if (!(ns->pid_allocated & PIDNS_ADDING)) goto out_unlock; pidfs_add_pid(pid); for ( ; upid >= pid->numbers; --upid) { /* Make the PID visible to find_pid_ns. */ idr_replace(&upid->ns->idr, pid, upid->nr); upid->ns->pid_allocated++; } spin_unlock(&pidmap_lock); idr_preload_end(); ns_ref_active_get(ns); return pid; out_unlock: spin_unlock(&pidmap_lock); idr_preload_end(); put_pid_ns(ns); out_free: spin_lock(&pidmap_lock); while (++i <= ns->level) { upid = pid->numbers + i; idr_remove(&upid->ns->idr, upid->nr); } /* On failure to allocate the first pid, reset the state */ if (ns->pid_allocated == PIDNS_ADDING) idr_set_cursor(&ns->idr, 0); spin_unlock(&pidmap_lock); kmem_cache_free(ns->pid_cachep, pid); return ERR_PTR(retval); } void disable_pid_allocation(struct pid_namespace *ns) { spin_lock(&pidmap_lock); ns->pid_allocated &= ~PIDNS_ADDING; spin_unlock(&pidmap_lock); } struct pid *find_pid_ns(int nr, struct pid_namespace *ns) { return idr_find(&ns->idr, nr); } EXPORT_SYMBOL_GPL(find_pid_ns); struct pid *find_vpid(int nr) { return find_pid_ns(nr, task_active_pid_ns(current)); } EXPORT_SYMBOL_GPL(find_vpid); static struct pid **task_pid_ptr(struct task_struct *task, enum pid_type type) { return (type == PIDTYPE_PID) ? &task->thread_pid : &task->signal->pids[type]; } /* * attach_pid() must be called with the tasklist_lock write-held. */ void attach_pid(struct task_struct *task, enum pid_type type) { struct pid *pid; lockdep_assert_held_write(&tasklist_lock); pid = *task_pid_ptr(task, type); hlist_add_head_rcu(&task->pid_links[type], &pid->tasks[type]); } static void __change_pid(struct pid **pids, struct task_struct *task, enum pid_type type, struct pid *new) { struct pid **pid_ptr, *pid; int tmp; lockdep_assert_held_write(&tasklist_lock); pid_ptr = task_pid_ptr(task, type); pid = *pid_ptr; hlist_del_rcu(&task->pid_links[type]); *pid_ptr = new; for (tmp = PIDTYPE_MAX; --tmp >= 0; ) if (pid_has_task(pid, tmp)) return; WARN_ON(pids[type]); pids[type] = pid; } void detach_pid(struct pid **pids, struct task_struct *task, enum pid_type type) { __change_pid(pids, task, type, NULL); } void change_pid(struct pid **pids, struct task_struct *task, enum pid_type type, struct pid *pid) { __change_pid(pids, task, type, pid); attach_pid(task, type); } void exchange_tids(struct task_struct *left, struct task_struct *right) { struct pid *pid1 = left->thread_pid; struct pid *pid2 = right->thread_pid; struct hlist_head *head1 = &pid1->tasks[PIDTYPE_PID]; struct hlist_head *head2 = &pid2->tasks[PIDTYPE_PID]; lockdep_assert_held_write(&tasklist_lock); /* Swap the single entry tid lists */ hlists_swap_heads_rcu(head1, head2); /* Swap the per task_struct pid */ rcu_assign_pointer(left->thread_pid, pid2); rcu_assign_pointer(right->thread_pid, pid1); /* Swap the cached value */ WRITE_ONCE(left->pid, pid_nr(pid2)); WRITE_ONCE(right->pid, pid_nr(pid1)); } /* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */ void transfer_pid(struct task_struct *old, struct task_struct *new, enum pid_type type) { WARN_ON_ONCE(type == PIDTYPE_PID); lockdep_assert_held_write(&tasklist_lock); hlist_replace_rcu(&old->pid_links[type], &new->pid_links[type]); } struct task_struct *pid_task(struct pid *pid, enum pid_type type) { struct task_struct *result = NULL; if (pid) { struct hlist_node *first; first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]), lockdep_tasklist_lock_is_held()); if (first) result = hlist_entry(first, struct task_struct, pid_links[(type)]); } return result; } EXPORT_SYMBOL(pid_task); /* * Must be called under rcu_read_lock(). */ struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) { RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "find_task_by_pid_ns() needs rcu_read_lock() protection"); return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); } struct task_struct *find_task_by_vpid(pid_t vnr) { return find_task_by_pid_ns(vnr, task_active_pid_ns(current)); } struct task_struct *find_get_task_by_vpid(pid_t nr) { struct task_struct *task; rcu_read_lock(); task = find_task_by_vpid(nr); if (task) get_task_struct(task); rcu_read_unlock(); return task; } struct pid *get_task_pid(struct task_struct *task, enum pid_type type) { struct pid *pid; rcu_read_lock(); pid = get_pid(rcu_dereference(*task_pid_ptr(task, type))); rcu_read_unlock(); return pid; } EXPORT_SYMBOL_GPL(get_task_pid); struct task_struct *get_pid_task(struct pid *pid, enum pid_type type) { struct task_struct *result; rcu_read_lock(); result = pid_task(pid, type); if (result) get_task_struct(result); rcu_read_unlock(); return result; } EXPORT_SYMBOL_GPL(get_pid_task); struct pid *find_get_pid(pid_t nr) { struct pid *pid; rcu_read_lock(); pid = get_pid(find_vpid(nr)); rcu_read_unlock(); return pid; } EXPORT_SYMBOL_GPL(find_get_pid); pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns) { struct upid *upid; pid_t nr = 0; if (pid && ns && ns->level <= pid->level) { upid = &pid->numbers[ns->level]; if (upid->ns == ns) nr = upid->nr; } return nr; } EXPORT_SYMBOL_GPL(pid_nr_ns); pid_t pid_vnr(struct pid *pid) { return pid_nr_ns(pid, task_active_pid_ns(current)); } EXPORT_SYMBOL_GPL(pid_vnr); pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, struct pid_namespace *ns) { pid_t nr = 0; rcu_read_lock(); if (!ns) ns = task_active_pid_ns(current); if (ns) nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns); rcu_read_unlock(); return nr; } EXPORT_SYMBOL(__task_pid_nr_ns); struct pid_namespace *task_active_pid_ns(struct task_struct *tsk) { return ns_of_pid(task_pid(tsk)); } EXPORT_SYMBOL_GPL(task_active_pid_ns); /* * Used by proc to find the first pid that is greater than or equal to nr. * * If there is a pid at nr this function is exactly the same as find_pid_ns. */ struct pid *find_ge_pid(int nr, struct pid_namespace *ns) { return idr_get_next(&ns->idr, &nr); } EXPORT_SYMBOL_GPL(find_ge_pid); struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags) { CLASS(fd, f)(fd); struct pid *pid; if (fd_empty(f)) return ERR_PTR(-EBADF); pid = pidfd_pid(fd_file(f)); if (!IS_ERR(pid)) { get_pid(pid); *flags = fd_file(f)->f_flags; } return pid; } /** * pidfd_get_task() - Get the task associated with a pidfd * * @pidfd: pidfd for which to get the task * @flags: flags associated with this pidfd * * Return the task associated with @pidfd. The function takes a reference on * the returned task. The caller is responsible for releasing that reference. * * Return: On success, the task_struct associated with the pidfd. * On error, a negative errno number will be returned. */ struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags) { unsigned int f_flags = 0; struct pid *pid; struct task_struct *task; enum pid_type type; switch (pidfd) { case PIDFD_SELF_THREAD: type = PIDTYPE_PID; pid = get_task_pid(current, type); break; case PIDFD_SELF_THREAD_GROUP: type = PIDTYPE_TGID; pid = get_task_pid(current, type); break; default: pid = pidfd_get_pid(pidfd, &f_flags); if (IS_ERR(pid)) return ERR_CAST(pid); type = PIDTYPE_TGID; break; } task = get_pid_task(pid, type); put_pid(pid); if (!task) return ERR_PTR(-ESRCH); *flags = f_flags; return task; } /** * pidfd_create() - Create a new pid file descriptor. * * @pid: struct pid that the pidfd will reference * @flags: flags to pass * * This creates a new pid file descriptor with the O_CLOEXEC flag set. * * Note, that this function can only be called after the fd table has * been unshared to avoid leaking the pidfd to the new process. * * This symbol should not be explicitly exported to loadable modules. * * Return: On success, a cloexec pidfd is returned. * On error, a negative errno number will be returned. */ static int pidfd_create(struct pid *pid, unsigned int flags) { int pidfd; struct file *pidfd_file; pidfd = pidfd_prepare(pid, flags, &pidfd_file); if (pidfd < 0) return pidfd; fd_install(pidfd, pidfd_file); return pidfd; } /** * sys_pidfd_open() - Open new pid file descriptor. * * @pid: pid for which to retrieve a pidfd * @flags: flags to pass * * This creates a new pid file descriptor with the O_CLOEXEC flag set for * the task identified by @pid. Without PIDFD_THREAD flag the target task * must be a thread-group leader. * * Return: On success, a cloexec pidfd is returned. * On error, a negative errno number will be returned. */ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags) { int fd; struct pid *p; if (flags & ~(PIDFD_NONBLOCK | PIDFD_THREAD)) return -EINVAL; if (pid <= 0) return -EINVAL; p = find_get_pid(pid); if (!p) return -ESRCH; fd = pidfd_create(p, flags); put_pid(p); return fd; } #ifdef CONFIG_SYSCTL static struct ctl_table_set *pid_table_root_lookup(struct ctl_table_root *root) { return &task_active_pid_ns(current)->set; } static int set_is_seen(struct ctl_table_set *set) { return &task_active_pid_ns(current)->set == set; } static int pid_table_root_permissions(struct ctl_table_header *head, const struct ctl_table *table) { struct pid_namespace *pidns = container_of(head->set, struct pid_namespace, set); int mode = table->mode; if (ns_capable_noaudit(pidns->user_ns, CAP_SYS_ADMIN) || uid_eq(current_euid(), make_kuid(pidns->user_ns, 0))) mode = (mode & S_IRWXU) >> 6; else if (in_egroup_p(make_kgid(pidns->user_ns, 0))) mode = (mode & S_IRWXG) >> 3; else mode = mode & S_IROTH; return (mode << 6) | (mode << 3) | mode; } static void pid_table_root_set_ownership(struct ctl_table_header *head, kuid_t *uid, kgid_t *gid) { struct pid_namespace *pidns = container_of(head->set, struct pid_namespace, set); kuid_t ns_root_uid; kgid_t ns_root_gid; ns_root_uid = make_kuid(pidns->user_ns, 0); if (uid_valid(ns_root_uid)) *uid = ns_root_uid; ns_root_gid = make_kgid(pidns->user_ns, 0); if (gid_valid(ns_root_gid)) *gid = ns_root_gid; } static struct ctl_table_root pid_table_root = { .lookup = pid_table_root_lookup, .permissions = pid_table_root_permissions, .set_ownership = pid_table_root_set_ownership, }; static int proc_do_cad_pid(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct pid *new_pid; pid_t tmp_pid; int r; struct ctl_table tmp_table = *table; tmp_pid = pid_vnr(cad_pid); tmp_table.data = &tmp_pid; r = proc_dointvec(&tmp_table, write, buffer, lenp, ppos); if (r || !write) return r; new_pid = find_get_pid(tmp_pid); if (!new_pid) return -ESRCH; put_pid(xchg(&cad_pid, new_pid)); return 0; } static const struct ctl_table pid_table[] = { { .procname = "pid_max", .data = &init_pid_ns.pid_max, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &pid_max_min, .extra2 = &pid_max_max, }, #ifdef CONFIG_PROC_SYSCTL { .procname = "cad_pid", .maxlen = sizeof(int), .mode = 0600, .proc_handler = proc_do_cad_pid, }, #endif }; #endif int register_pidns_sysctls(struct pid_namespace *pidns) { #ifdef CONFIG_SYSCTL struct ctl_table *tbl; setup_sysctl_set(&pidns->set, &pid_table_root, set_is_seen); tbl = kmemdup(pid_table, sizeof(pid_table), GFP_KERNEL); if (!tbl) return -ENOMEM; tbl->data = &pidns->pid_max; pidns->pid_max = min(pid_max_max, max_t(int, pidns->pid_max, PIDS_PER_CPU_DEFAULT * num_possible_cpus())); pidns->sysctls = __register_sysctl_table(&pidns->set, "kernel", tbl, ARRAY_SIZE(pid_table)); if (!pidns->sysctls) { kfree(tbl); retire_sysctl_set(&pidns->set); return -ENOMEM; } #endif return 0; } void unregister_pidns_sysctls(struct pid_namespace *pidns) { #ifdef CONFIG_SYSCTL const struct ctl_table *tbl; tbl = pidns->sysctls->ctl_table_arg; unregister_sysctl_table(pidns->sysctls); retire_sysctl_set(&pidns->set); kfree(tbl); #endif } void __init pid_idr_init(void) { /* Verify no one has done anything silly: */ BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_ADDING); /* bump default and minimum pid_max based on number of cpus */ init_pid_ns.pid_max = min(pid_max_max, max_t(int, init_pid_ns.pid_max, PIDS_PER_CPU_DEFAULT * num_possible_cpus())); pid_max_min = max_t(int, pid_max_min, PIDS_PER_CPU_MIN * num_possible_cpus()); pr_info("pid_max: default: %u minimum: %u\n", init_pid_ns.pid_max, pid_max_min); idr_init(&init_pid_ns.idr); init_pid_ns.pid_cachep = kmem_cache_create("pid", struct_size_t(struct pid, numbers, 1), __alignof__(struct pid), SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT, NULL); } static __init int pid_namespace_sysctl_init(void) { #ifdef CONFIG_SYSCTL /* "kernel" directory will have already been initialized. */ BUG_ON(register_pidns_sysctls(&init_pid_ns)); #endif return 0; } subsys_initcall(pid_namespace_sysctl_init); static struct file *__pidfd_fget(struct task_struct *task, int fd) { struct file *file; int ret; ret = down_read_killable(&task->signal->exec_update_lock); if (ret) return ERR_PTR(ret); if (ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS)) file = fget_task(task, fd); else file = ERR_PTR(-EPERM); up_read(&task->signal->exec_update_lock); if (!file) { /* * It is possible that the target thread is exiting; it can be * either: * 1. before exit_signals(), which gives a real fd * 2. before exit_files() takes the task_lock() gives a real fd * 3. after exit_files() releases task_lock(), ->files is NULL; * this has PF_EXITING, since it was set in exit_signals(), * __pidfd_fget() returns EBADF. * In case 3 we get EBADF, but that really means ESRCH, since * the task is currently exiting and has freed its files * struct, so we fix it up. */ if (task->flags & PF_EXITING) file = ERR_PTR(-ESRCH); else file = ERR_PTR(-EBADF); } return file; } static int pidfd_getfd(struct pid *pid, int fd) { struct task_struct *task; struct file *file; int ret; task = get_pid_task(pid, PIDTYPE_PID); if (!task) return -ESRCH; file = __pidfd_fget(task, fd); put_task_struct(task); if (IS_ERR(file)) return PTR_ERR(file); ret = receive_fd(file, NULL, O_CLOEXEC); fput(file); return ret; } /** * sys_pidfd_getfd() - Get a file descriptor from another process * * @pidfd: the pidfd file descriptor of the process * @fd: the file descriptor number to get * @flags: flags on how to get the fd (reserved) * * This syscall gets a copy of a file descriptor from another process * based on the pidfd, and file descriptor number. It requires that * the calling process has the ability to ptrace the process represented * by the pidfd. The process which is having its file descriptor copied * is otherwise unaffected. * * Return: On success, a cloexec file descriptor is returned. * On error, a negative errno number will be returned. */ SYSCALL_DEFINE3(pidfd_getfd, int, pidfd, int, fd, unsigned int, flags) { struct pid *pid; /* flags is currently unused - make sure it's unset */ if (flags) return -EINVAL; CLASS(fd, f)(pidfd); if (fd_empty(f)) return -EBADF; pid = pidfd_pid(fd_file(f)); if (IS_ERR(pid)) return PTR_ERR(pid); return pidfd_getfd(pid, fd); }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 /* SPDX-License-Identifier: GPL-2.0 */ /* * NFS internal definitions */ #include "nfs4_fs.h" #include <linux/fs_context.h> #include <linux/security.h> #include <linux/compiler_attributes.h> #include <linux/crc32.h> #include <linux/sunrpc/addr.h> #include <linux/nfs_page.h> #include <linux/nfslocalio.h> #include <linux/wait_bit.h> #define NFS_SB_MASK (SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS) extern const struct export_operations nfs_export_ops; struct nfs_string; struct nfs_pageio_descriptor; static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr) { if (!nfs_fsid_equal(&NFS_SB(parent)->fsid, &fattr->fsid)) fattr->valid |= NFS_ATTR_FATTR_MOUNTPOINT; } static inline int nfs_attr_use_mounted_on_fileid(struct nfs_fattr *fattr) { if (((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) == 0) || (((fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT) == 0) && ((fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) == 0))) return 0; return 1; } static inline bool nfs_lookup_is_soft_revalidate(const struct dentry *dentry) { if (!(NFS_SB(dentry->d_sb)->flags & NFS_MOUNT_SOFTREVAL)) return false; if (!d_is_positive(dentry) || !NFS_FH(d_inode(dentry))->size) return false; return true; } static inline fmode_t flags_to_mode(int flags) { fmode_t res = (__force fmode_t)flags & FMODE_EXEC; if ((flags & O_ACCMODE) != O_WRONLY) res |= FMODE_READ; if ((flags & O_ACCMODE) != O_RDONLY) res |= FMODE_WRITE; return res; } /* * Note: RFC 1813 doesn't limit the number of auth flavors that * a server can return, so make something up. */ #define NFS_MAX_SECFLAVORS (12) /* * Value used if the user did not specify a port value. */ #define NFS_UNSPEC_PORT (-1) #define NFS_UNSPEC_RETRANS (UINT_MAX) #define NFS_UNSPEC_TIMEO (UINT_MAX) struct nfs_client_initdata { unsigned long init_flags; const char *hostname; /* Hostname of the server */ const struct sockaddr_storage *addr; /* Address of the server */ const char *nodename; /* Hostname of the client */ const char *ip_addr; /* IP address of the client */ size_t addrlen; struct nfs_subversion *nfs_mod; int proto; u32 minorversion; unsigned int nconnect; unsigned int max_connect; struct net *net; const struct rpc_timeout *timeparms; const struct cred *cred; struct xprtsec_parms xprtsec; unsigned long connect_timeout; unsigned long reconnect_timeout; }; /* * In-kernel mount arguments */ struct nfs_fs_context { bool internal; bool skip_reconfig_option_check; bool need_mount; bool sloppy; unsigned int flags; /* NFS{,4}_MOUNT_* flags */ unsigned int rsize, wsize; unsigned int timeo, retrans; unsigned int acregmin, acregmax; unsigned int acdirmin, acdirmax; unsigned int namlen; unsigned int options; unsigned int bsize; struct nfs_auth_info auth_info; rpc_authflavor_t selected_flavor; struct xprtsec_parms xprtsec; char *client_address; unsigned int version; unsigned int minorversion; char *fscache_uniq; unsigned short protofamily; unsigned short mountfamily; bool has_sec_mnt_opts; int lock_status; struct { union { struct sockaddr address; struct sockaddr_storage _address; }; size_t addrlen; char *hostname; u32 version; int port; unsigned short protocol; } mount_server; struct { union { struct sockaddr address; struct sockaddr_storage _address; }; size_t addrlen; char *hostname; char *export_path; int port; unsigned short protocol; unsigned short nconnect; unsigned short max_connect; unsigned short export_path_len; } nfs_server; struct nfs_fh *mntfh; struct nfs_server *server; struct nfs_subversion *nfs_mod; /* Information for a cloned mount. */ struct nfs_clone_mount { struct super_block *sb; struct dentry *dentry; struct nfs_fattr *fattr; unsigned int inherited_bsize; } clone_data; }; enum nfs_lock_status { NFS_LOCK_NOT_SET = 0, NFS_LOCK_LOCK = 1, NFS_LOCK_NOLOCK = 2, }; #define nfs_errorf(fc, fmt, ...) ((fc)->log.log ? \ errorf(fc, fmt, ## __VA_ARGS__) : \ ({ dprintk(fmt "\n", ## __VA_ARGS__); })) #define nfs_ferrorf(fc, fac, fmt, ...) ((fc)->log.log ? \ errorf(fc, fmt, ## __VA_ARGS__) : \ ({ dfprintk(fac, fmt "\n", ## __VA_ARGS__); })) #define nfs_invalf(fc, fmt, ...) ((fc)->log.log ? \ invalf(fc, fmt, ## __VA_ARGS__) : \ ({ dprintk(fmt "\n", ## __VA_ARGS__); -EINVAL; })) #define nfs_finvalf(fc, fac, fmt, ...) ((fc)->log.log ? \ invalf(fc, fmt, ## __VA_ARGS__) : \ ({ dfprintk(fac, fmt "\n", ## __VA_ARGS__); -EINVAL; })) #define nfs_warnf(fc, fmt, ...) ((fc)->log.log ? \ warnf(fc, fmt, ## __VA_ARGS__) : \ ({ dprintk(fmt "\n", ## __VA_ARGS__); })) #define nfs_fwarnf(fc, fac, fmt, ...) ((fc)->log.log ? \ warnf(fc, fmt, ## __VA_ARGS__) : \ ({ dfprintk(fac, fmt "\n", ## __VA_ARGS__); })) static inline struct nfs_fs_context *nfs_fc2context(const struct fs_context *fc) { return fc->fs_private; } /* mount_clnt.c */ struct nfs_mount_request { struct sockaddr_storage *sap; size_t salen; char *hostname; char *dirpath; u32 version; unsigned short protocol; struct nfs_fh *fh; int noresvport; unsigned int *auth_flav_len; rpc_authflavor_t *auth_flavs; struct net *net; }; extern int nfs_mount(struct nfs_mount_request *info, int timeo, int retrans); /* client.c */ extern const struct rpc_program nfs_program; extern void nfs_clients_init(struct net *net); extern void nfs_clients_exit(struct net *net); extern struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *); int nfs_create_rpc_client(struct nfs_client *, const struct nfs_client_initdata *, rpc_authflavor_t); struct nfs_client *nfs_get_client(const struct nfs_client_initdata *); int nfs_probe_server(struct nfs_server *, struct nfs_fh *); void nfs_server_insert_lists(struct nfs_server *); void nfs_server_remove_lists(struct nfs_server *); void nfs_init_timeout_values(struct rpc_timeout *to, int proto, int timeo, int retrans); int nfs_init_server_rpcclient(struct nfs_server *, const struct rpc_timeout *t, rpc_authflavor_t); struct nfs_server *nfs_alloc_server(void); void nfs_server_copy_userdata(struct nfs_server *, struct nfs_server *); extern void nfs_put_client(struct nfs_client *); extern void nfs_free_client(struct nfs_client *); extern struct nfs_client *nfs4_find_client_ident(struct net *, int); extern struct nfs_client * nfs4_find_client_sessionid(struct net *, const struct sockaddr *, struct nfs4_sessionid *, u32); extern struct nfs_server *nfs_create_server(struct fs_context *); extern void nfs_server_set_init_caps(struct nfs_server *); extern struct nfs_server *nfs4_create_server(struct fs_context *); extern struct nfs_server *nfs4_create_referral_server(struct fs_context *); extern int nfs4_update_server(struct nfs_server *server, const char *hostname, struct sockaddr_storage *sap, size_t salen, struct net *net); extern void nfs_free_server(struct nfs_server *server); extern struct nfs_server *nfs_clone_server(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, rpc_authflavor_t); extern bool nfs_client_init_is_complete(const struct nfs_client *clp); extern int nfs_client_init_status(const struct nfs_client *clp); extern int nfs_wait_client_init_complete(const struct nfs_client *clp); extern void nfs_mark_client_ready(struct nfs_client *clp, int state); extern struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, const struct sockaddr_storage *ds_addr, int ds_addrlen, int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans, u32 minor_version); extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *, struct inode *); extern struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, const struct sockaddr_storage *ds_addr, int ds_addrlen, int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans); #ifdef CONFIG_PROC_FS extern int __init nfs_fs_proc_init(void); extern void nfs_fs_proc_exit(void); extern int nfs_fs_proc_net_init(struct net *net); extern void nfs_fs_proc_net_exit(struct net *net); #else static inline int nfs_fs_proc_net_init(struct net *net) { return 0; } static inline void nfs_fs_proc_net_exit(struct net *net) { } static inline int nfs_fs_proc_init(void) { return 0; } static inline void nfs_fs_proc_exit(void) { } #endif /* callback_xdr.c */ extern const struct svc_version nfs4_callback_version1; extern const struct svc_version nfs4_callback_version4; /* fs_context.c */ extern struct file_system_type nfs_fs_type; /* pagelist.c */ extern int __init nfs_init_nfspagecache(void); extern void nfs_destroy_nfspagecache(void); extern int __init nfs_init_readpagecache(void); extern void nfs_destroy_readpagecache(void); extern int __init nfs_init_writepagecache(void); extern void nfs_destroy_writepagecache(void); extern int __init nfs_init_directcache(void); extern void nfs_destroy_directcache(void); extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr, void (*release)(struct nfs_pgio_header *hdr)); void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); int nfs_iocounter_wait(struct nfs_lock_context *l_ctx); extern const struct nfs_pageio_ops nfs_pgio_rw_ops; struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *); void nfs_pgio_header_free(struct nfs_pgio_header *); int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, const struct cred *cred, const struct nfs_rpc_ops *rpc_ops, const struct rpc_call_ops *call_ops, int how, int flags, struct nfsd_file *localio); void nfs_free_request(struct nfs_page *req); struct nfs_pgio_mirror * nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc); static inline bool nfs_match_open_context(const struct nfs_open_context *ctx1, const struct nfs_open_context *ctx2) { return cred_fscmp(ctx1->cred, ctx2->cred) == 0 && ctx1->state == ctx2->state; } /* nfs2xdr.c */ extern const struct rpc_procinfo nfs_procedures[]; extern int nfs2_decode_dirent(struct xdr_stream *, struct nfs_entry *, bool); /* nfs3xdr.c */ extern const struct rpc_procinfo nfs3_procedures[]; extern int nfs3_decode_dirent(struct xdr_stream *, struct nfs_entry *, bool); /* nfs4xdr.c */ #if IS_ENABLED(CONFIG_NFS_V4) extern int nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, bool); #endif #ifdef CONFIG_NFS_V4_1 extern const u32 nfs41_maxread_overhead; extern const u32 nfs41_maxwrite_overhead; extern const u32 nfs41_maxgetdevinfo_overhead; #endif /* nfs4proc.c */ #if IS_ENABLED(CONFIG_NFS_V4) extern const struct rpc_procinfo nfs4_procedures[]; #endif #ifdef CONFIG_NFS_V4_SECURITY_LABEL extern struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags); static inline struct nfs4_label * nfs4_label_copy(struct nfs4_label *dst, struct nfs4_label *src) { if (!dst || !src) return NULL; if (src->len > NFS4_MAXLABELLEN) return NULL; dst->lfs = src->lfs; dst->pi = src->pi; dst->len = src->len; memcpy(dst->label, src->label, src->len); return dst; } static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi) { if (nfs_server_capable(&nfsi->vfs_inode, NFS_CAP_SECURITY_LABEL)) nfsi->cache_validity |= NFS_INO_INVALID_LABEL; } #else static inline struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { return NULL; } static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi) { } static inline struct nfs4_label * nfs4_label_copy(struct nfs4_label *dst, struct nfs4_label *src) { return NULL; } #endif /* CONFIG_NFS_V4_SECURITY_LABEL */ /* proc.c */ void nfs_close_context(struct nfs_open_context *ctx, int is_sync); extern struct nfs_client *nfs_init_client(struct nfs_client *clp, const struct nfs_client_initdata *); /* dir.c */ extern void nfs_readdir_record_entry_cache_hit(struct inode *dir); extern void nfs_readdir_record_entry_cache_miss(struct inode *dir); extern unsigned long nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc); extern unsigned long nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc); struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); void nfs_d_prune_case_insensitive_aliases(struct inode *inode); int nfs_create(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, bool); struct dentry *nfs_mkdir(struct mnt_idmap *, struct inode *, struct dentry *, umode_t); int nfs_rmdir(struct inode *, struct dentry *); int nfs_unlink(struct inode *, struct dentry *); int nfs_symlink(struct mnt_idmap *, struct inode *, struct dentry *, const char *); int nfs_link(struct dentry *, struct inode *, struct dentry *); int nfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, dev_t); int nfs_rename(struct mnt_idmap *, struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); #ifdef CONFIG_NFS_V4_2 static inline __u32 nfs_access_xattr_mask(const struct nfs_server *server) { if (!(server->caps & NFS_CAP_XATTR)) return 0; return NFS4_ACCESS_XAREAD | NFS4_ACCESS_XAWRITE | NFS4_ACCESS_XALIST; } #else static inline __u32 nfs_access_xattr_mask(const struct nfs_server *server) { return 0; } #endif /* file.c */ int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync); loff_t nfs_file_llseek(struct file *, loff_t, int); ssize_t nfs_file_read(struct kiocb *, struct iov_iter *); ssize_t nfs_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); int nfs_file_mmap_prepare(struct vm_area_desc *); ssize_t nfs_file_write(struct kiocb *, struct iov_iter *); int nfs_file_release(struct inode *, struct file *); int nfs_lock(struct file *, int, struct file_lock *); int nfs_flock(struct file *, int, struct file_lock *); int nfs_check_flags(int); void nfs_truncate_last_folio(struct address_space *mapping, loff_t from, loff_t to); /* inode.c */ extern struct workqueue_struct *nfsiod_workqueue; extern struct workqueue_struct *nfslocaliod_workqueue; extern struct inode *nfs_alloc_inode(struct super_block *sb); extern void nfs_free_inode(struct inode *); extern int nfs_write_inode(struct inode *, struct writeback_control *); extern int nfs_drop_inode(struct inode *); extern void nfs_clear_inode(struct inode *); extern void nfs_evict_inode(struct inode *); extern void nfs_zap_acl_cache(struct inode *inode); extern void nfs_set_cache_invalid(struct inode *inode, unsigned long flags); extern bool nfs_check_cache_invalid(struct inode *, unsigned long); extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode); #if IS_ENABLED(CONFIG_NFS_LOCALIO) /* localio.c */ struct nfs_local_dio { u32 mem_align; u32 offset_align; loff_t middle_offset; loff_t end_offset; ssize_t start_len; /* Length for misaligned first extent */ ssize_t middle_len; /* Length for DIO-aligned middle extent */ ssize_t end_len; /* Length for misaligned last extent */ }; extern void nfs_local_probe_async(struct nfs_client *); extern void nfs_local_probe_async_work(struct work_struct *); extern struct nfsd_file *nfs_local_open_fh(struct nfs_client *, const struct cred *, struct nfs_fh *, struct nfs_file_localio *, const fmode_t); extern int nfs_local_doio(struct nfs_client *, struct nfsd_file *, struct nfs_pgio_header *, const struct rpc_call_ops *); extern int nfs_local_commit(struct nfsd_file *, struct nfs_commit_data *, const struct rpc_call_ops *, int); extern bool nfs_server_is_local(const struct nfs_client *clp); #else /* CONFIG_NFS_LOCALIO */ static inline void nfs_local_probe(struct nfs_client *clp) {} static inline void nfs_local_probe_async(struct nfs_client *clp) {} static inline struct nfsd_file * nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, struct nfs_fh *fh, struct nfs_file_localio *nfl, const fmode_t mode) { return NULL; } static inline int nfs_local_doio(struct nfs_client *clp, struct nfsd_file *localio, struct nfs_pgio_header *hdr, const struct rpc_call_ops *call_ops) { return -EINVAL; } static inline int nfs_local_commit(struct nfsd_file *localio, struct nfs_commit_data *data, const struct rpc_call_ops *call_ops, int how) { return -EINVAL; } static inline bool nfs_server_is_local(const struct nfs_client *clp) { return false; } #endif /* CONFIG_NFS_LOCALIO */ /* super.c */ extern const struct super_operations nfs_sops; bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t); int nfs_try_get_tree(struct fs_context *); int nfs_get_tree_common(struct fs_context *); void nfs_kill_super(struct super_block *); extern int __init register_nfs_fs(void); extern void __exit unregister_nfs_fs(void); extern bool nfs_sb_active(struct super_block *sb); extern void nfs_sb_deactive(struct super_block *sb); extern int nfs_client_for_each_server(struct nfs_client *clp, int (*fn)(struct nfs_server *, void *), void *data); #ifdef CONFIG_NFS_FSCACHE extern const struct netfs_request_ops nfs_netfs_ops; #endif /* io.c */ extern __must_check int nfs_start_io_read(struct inode *inode); extern void nfs_end_io_read(struct inode *inode); extern __must_check int nfs_start_io_write(struct inode *inode); extern void nfs_end_io_write(struct inode *inode); extern __must_check int nfs_start_io_direct(struct inode *inode); extern void nfs_end_io_direct(struct inode *inode); static inline bool nfs_file_io_is_buffered(struct nfs_inode *nfsi) { return test_bit(NFS_INO_ODIRECT, &nfsi->flags) == 0; } /* Must be called with exclusively locked inode->i_rwsem */ static inline void nfs_file_block_o_direct(struct nfs_inode *nfsi) { if (test_bit(NFS_INO_ODIRECT, &nfsi->flags)) { clear_bit(NFS_INO_ODIRECT, &nfsi->flags); inode_dio_wait(&nfsi->vfs_inode); } } /* namespace.c */ #define NFS_PATH_CANONICAL 1 extern char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen, unsigned flags); extern struct vfsmount *nfs_d_automount(struct path *path); int nfs_submount(struct fs_context *, struct nfs_server *); int nfs_do_submount(struct fs_context *); /* getroot.c */ extern int nfs_get_root(struct super_block *s, struct fs_context *fc); #if IS_ENABLED(CONFIG_NFS_V4) extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool); #endif struct nfs_pgio_completion_ops; /* read.c */ extern const struct nfs_pgio_completion_ops nfs_async_read_completion_ops; extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops); extern bool nfs_read_alloc_scratch(struct nfs_pgio_header *hdr, size_t size); extern int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio, struct nfs_open_context *ctx, struct folio *folio); extern void nfs_pageio_complete_read(struct nfs_pageio_descriptor *pgio); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); /* super.c */ void nfs_umount_begin(struct super_block *); int nfs_statfs(struct dentry *, struct kstatfs *); int nfs_show_options(struct seq_file *, struct dentry *); int nfs_show_devname(struct seq_file *, struct dentry *); int nfs_show_path(struct seq_file *, struct dentry *); int nfs_show_stats(struct seq_file *, struct dentry *); int nfs_reconfigure(struct fs_context *); /* write.c */ extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops); extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_commit_free(struct nfs_commit_data *p); extern void nfs_commit_prepare(struct rpc_task *task, void *calldata); extern int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, const struct nfs_rpc_ops *nfs_ops, const struct rpc_call_ops *call_ops, int how, int flags, struct nfsd_file *localio); extern void nfs_init_commit(struct nfs_commit_data *data, struct list_head *head, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo); int nfs_scan_commit_list(struct list_head *src, struct list_head *dst, struct nfs_commit_info *cinfo, int max); unsigned long nfs_reqs_to_commit(struct nfs_commit_info *); int nfs_scan_commit(struct inode *inode, struct list_head *dst, struct nfs_commit_info *cinfo); void nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo, u32 ds_commit_idx); int nfs_write_need_commit(struct nfs_pgio_header *); void nfs_writeback_update_inode(struct nfs_pgio_header *hdr); int nfs_generic_commit_list(struct inode *inode, struct list_head *head, int how, struct nfs_commit_info *cinfo); void nfs_retry_commit(struct list_head *page_list, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo, u32 ds_commit_idx); void nfs_commitdata_release(struct nfs_commit_data *data); void nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo); void nfs_request_add_commit_list_locked(struct nfs_page *req, struct list_head *dst, struct nfs_commit_info *cinfo); void nfs_request_remove_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo); void nfs_init_cinfo(struct nfs_commit_info *cinfo, struct inode *inode, struct nfs_direct_req *dreq); int nfs_key_timeout_notify(struct file *filp, struct inode *inode); bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx, struct inode *inode); void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio); int nfs_filemap_write_and_wait_range(struct address_space *mapping, loff_t lstart, loff_t lend); #ifdef CONFIG_NFS_V4_1 static inline void pnfs_bucket_clear_pnfs_ds_commit_verifiers(struct pnfs_commit_bucket *buckets, unsigned int nbuckets) { unsigned int i; for (i = 0; i < nbuckets; i++) buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW; } static inline void nfs_clear_pnfs_ds_commit_verifiers(struct pnfs_ds_commit_info *cinfo) { struct pnfs_commit_array *array; rcu_read_lock(); list_for_each_entry_rcu(array, &cinfo->commits, cinfo_list) pnfs_bucket_clear_pnfs_ds_commit_verifiers(array->buckets, array->nbuckets); rcu_read_unlock(); } #else static inline void nfs_clear_pnfs_ds_commit_verifiers(struct pnfs_ds_commit_info *cinfo) { } #endif #ifdef CONFIG_MIGRATION int nfs_migrate_folio(struct address_space *, struct folio *dst, struct folio *src, enum migrate_mode); #else #define nfs_migrate_folio NULL #endif static inline int nfs_write_verifier_cmp(const struct nfs_write_verifier *v1, const struct nfs_write_verifier *v2) { return memcmp(v1->data, v2->data, sizeof(v1->data)); } static inline bool nfs_write_match_verf(const struct nfs_writeverf *verf, struct nfs_page *req) { return verf->committed > NFS_UNSTABLE && !nfs_write_verifier_cmp(&req->wb_verf, &verf->verifier); } static inline gfp_t nfs_io_gfp_mask(void) { gfp_t ret = current_gfp_context(GFP_KERNEL); /* For workers __GFP_NORETRY only with __GFP_IO or __GFP_FS */ if ((current->flags & PF_WQ_WORKER) && ret == GFP_KERNEL) ret |= __GFP_NORETRY | __GFP_NOWARN; return ret; } /* * Special version of should_remove_suid() that ignores capabilities. */ static inline int nfs_should_remove_suid(const struct inode *inode) { umode_t mode = inode->i_mode; int kill = 0; /* suid always must be killed */ if (unlikely(mode & S_ISUID)) kill = ATTR_KILL_SUID; /* * sgid without any exec bits is just a mandatory locking mark; leave * it alone. If some exec bits are set, it's a real sgid; kill it. */ if (unlikely((mode & S_ISGID) && (mode & S_IXGRP))) kill |= ATTR_KILL_SGID; if (unlikely(kill && S_ISREG(mode))) return kill; return 0; } /* unlink.c */ extern struct rpc_task * nfs_async_rename(struct inode *old_dir, struct inode *new_dir, struct dentry *old_dentry, struct dentry *new_dentry, void (*complete)(struct rpc_task *, struct nfs_renamedata *)); extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry); /* direct.c */ void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, struct nfs_direct_req *dreq); extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq, loff_t offset); /* nfs4proc.c */ extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, const struct nfs_client_initdata *); extern int nfs40_walk_client_list(struct nfs_client *clp, struct nfs_client **result, const struct cred *cred); extern int nfs41_walk_client_list(struct nfs_client *clp, struct nfs_client **result, const struct cred *cred); extern void nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt, void *data); static inline struct inode *nfs_igrab_and_active(struct inode *inode) { struct super_block *sb = inode->i_sb; if (sb && nfs_sb_active(sb)) { if (igrab(inode)) return inode; nfs_sb_deactive(sb); } return NULL; } static inline void nfs_iput_and_deactive(struct inode *inode) { if (inode != NULL) { struct super_block *sb = inode->i_sb; iput(inode); nfs_sb_deactive(sb); } } /* * Determine the device name as a string */ static inline char *nfs_devname(struct dentry *dentry, char *buffer, ssize_t buflen) { char *dummy; return nfs_path(&dummy, dentry, buffer, buflen, NFS_PATH_CANONICAL); } /* * Determine the actual block size (and log2 thereof) */ static inline unsigned long nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp) { /* make sure blocksize is a power of two */ if ((bsize & (bsize - 1)) || nrbitsp) { unsigned char nrbits; for (nrbits = 31; nrbits && !(bsize & (1UL << nrbits)); nrbits--) ; bsize = 1UL << nrbits; if (nrbitsp) *nrbitsp = nrbits; } return bsize; } /* * Calculate the number of 512byte blocks used. */ static inline blkcnt_t nfs_calc_block_size(u64 tsize) { blkcnt_t used = (tsize + 511) >> 9; return (used > ULONG_MAX) ? ULONG_MAX : used; } /* * Compute and set NFS server blocksize */ static inline unsigned long nfs_block_size(unsigned long bsize, unsigned char *nrbitsp) { if (bsize < NFS_MIN_FILE_IO_SIZE) bsize = NFS_DEF_FILE_IO_SIZE; else if (bsize >= NFS_MAX_FILE_IO_SIZE) bsize = NFS_MAX_FILE_IO_SIZE; return nfs_block_bits(bsize, nrbitsp); } /* * Compute and set NFS server rsize / wsize */ static inline unsigned long nfs_io_size(unsigned long iosize, enum xprt_transports proto) { if (iosize < NFS_MIN_FILE_IO_SIZE) iosize = NFS_DEF_FILE_IO_SIZE; else if (iosize >= NFS_MAX_FILE_IO_SIZE) iosize = NFS_MAX_FILE_IO_SIZE; if (proto == XPRT_TRANSPORT_UDP || iosize < PAGE_SIZE) return nfs_block_bits(iosize, NULL); return iosize & PAGE_MASK; } /* * Determine the maximum file size for a superblock */ static inline void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize) { sb->s_maxbytes = (loff_t)maxfilesize; if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0) sb->s_maxbytes = MAX_LFS_FILESIZE; } /* * Record the page as unstable (an extra writeback period) and mark its * inode as dirty. */ static inline void nfs_folio_mark_unstable(struct folio *folio, struct nfs_commit_info *cinfo) { if (folio && !cinfo->dreq) { struct inode *inode = folio->mapping->host; long nr = folio_nr_pages(folio); /* This page is really still in write-back - just that the * writeback is happening on the server now. */ node_stat_mod_folio(folio, NR_WRITEBACK, nr); wb_stat_mod(&inode_to_bdi(inode)->wb, WB_WRITEBACK, nr); __mark_inode_dirty(inode, I_DIRTY_DATASYNC); } } /* * Determine the number of bytes of data the page contains */ static inline size_t nfs_folio_length(struct folio *folio) { loff_t i_size = i_size_read(folio->mapping->host); if (i_size > 0) { pgoff_t index = folio->index >> folio_order(folio); pgoff_t end_index = (i_size - 1) >> folio_shift(folio); if (index < end_index) return folio_size(folio); if (index == end_index) return offset_in_folio(folio, i_size - 1) + 1; } return 0; } /* * Convert a umode to a dirent->d_type */ static inline unsigned char nfs_umode_to_dtype(umode_t mode) { return (mode >> 12) & 15; } /* * Determine the number of pages in an array of length 'len' and * with a base offset of 'base' */ static inline unsigned int nfs_page_array_len(unsigned int base, size_t len) { return ((unsigned long)len + (unsigned long)base + PAGE_SIZE - 1) >> PAGE_SHIFT; } /* * Convert a struct timespec64 into a 64-bit change attribute * * This does approximately the same thing as timespec64_to_ns(), * but for calculation efficiency, we multiply the seconds by * 1024*1024*1024. */ static inline u64 nfs_timespec_to_change_attr(const struct timespec64 *ts) { return ((u64)ts->tv_sec << 30) + ts->tv_nsec; } static inline u32 nfs_stateid_hash(const nfs4_stateid *stateid) { return ~crc32_le(0xFFFFFFFF, &stateid->other[0], NFS4_STATEID_OTHER_SIZE); } static inline bool nfs_current_task_exiting(void) { return (current->flags & PF_EXITING) != 0; } static inline bool nfs_error_is_fatal(int err) { switch (err) { case -ERESTARTSYS: case -EINTR: case -EACCES: case -EDQUOT: case -EFBIG: case -EIO: case -ENOSPC: case -EROFS: case -ESTALE: case -E2BIG: case -ENOMEM: case -ETIMEDOUT: return true; default: return false; } } static inline bool nfs_error_is_fatal_on_server(int err) { switch (err) { case 0: case -ERESTARTSYS: case -EINTR: case -ENOMEM: return false; } return nfs_error_is_fatal(err); } /* * Select between a default port value and a user-specified port value. * If a zero value is set, then autobind will be used. */ static inline void nfs_set_port(struct sockaddr_storage *sap, int *port, const unsigned short default_port) { if (*port == NFS_UNSPEC_PORT) *port = default_port; rpc_set_port((struct sockaddr *)sap, *port); } struct nfs_direct_req { struct kref kref; /* release manager */ /* I/O parameters */ struct nfs_open_context *ctx; /* file open context info */ struct nfs_lock_context *l_ctx; /* Lock context info */ struct kiocb * iocb; /* controlling i/o request */ struct inode * inode; /* target file of i/o */ /* completion state */ atomic_t io_count; /* i/os we're waiting for */ spinlock_t lock; /* protect completion state */ loff_t io_start; /* Start offset for I/O */ ssize_t count, /* bytes actually processed */ max_count, /* max expected count */ error; /* any reported error */ struct completion completion; /* wait for i/o completion */ /* commit state */ struct nfs_mds_commit_info mds_cinfo; /* Storage for cinfo */ struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */ struct work_struct work; int flags; /* for write */ #define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ #define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ /* for read */ #define NFS_ODIRECT_SHOULD_DIRTY (3) /* dirty user-space page after read */ #define NFS_ODIRECT_DONE INT_MAX /* write verification failed */ };
4 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 // SPDX-License-Identifier: GPL-2.0-or-later /* * IPVS: Round-Robin Scheduling module * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * Peter Kese <peter.kese@ijs.si> * * Fixes/Changes: * Wensong Zhang : changed the ip_vs_rr_schedule to return dest * Julian Anastasov : fixed the NULL pointer access bug in debugging * Wensong Zhang : changed some comestics things for debugging * Wensong Zhang : changed for the d-linked destination list * Wensong Zhang : added the ip_vs_rr_update_svc * Wensong Zhang : added any dest with weight=0 is quiesced */ #define KMSG_COMPONENT "IPVS" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/module.h> #include <linux/kernel.h> #include <net/ip_vs.h> static int ip_vs_rr_init_svc(struct ip_vs_service *svc) { svc->sched_data = &svc->destinations; return 0; } static int ip_vs_rr_del_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest) { struct list_head *p; spin_lock_bh(&svc->sched_lock); p = (struct list_head *) svc->sched_data; /* dest is already unlinked, so p->prev is not valid but * p->next is valid, use it to reach previous entry. */ if (p == &dest->n_list) svc->sched_data = p->next->prev; spin_unlock_bh(&svc->sched_lock); return 0; } /* * Round-Robin Scheduling */ static struct ip_vs_dest * ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, struct ip_vs_iphdr *iph) { struct list_head *p; struct ip_vs_dest *dest, *last; int pass = 0; IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); spin_lock_bh(&svc->sched_lock); p = (struct list_head *) svc->sched_data; last = dest = list_entry(p, struct ip_vs_dest, n_list); do { list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) { if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && atomic_read(&dest->weight) > 0) /* HIT */ goto out; if (dest == last) goto stop; } pass++; /* Previous dest could be unlinked, do not loop forever. * If we stay at head there is no need for 2nd pass. */ } while (pass < 2 && p != &svc->destinations); stop: spin_unlock_bh(&svc->sched_lock); ip_vs_scheduler_err(svc, "no destination available"); return NULL; out: svc->sched_data = &dest->n_list; spin_unlock_bh(&svc->sched_lock); IP_VS_DBG_BUF(6, "RR: server %s:%u " "activeconns %d refcnt %d weight %d\n", IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->activeconns), refcount_read(&dest->refcnt), atomic_read(&dest->weight)); return dest; } static struct ip_vs_scheduler ip_vs_rr_scheduler = { .name = "rr", /* name */ .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list), .init_service = ip_vs_rr_init_svc, .add_dest = NULL, .del_dest = ip_vs_rr_del_dest, .schedule = ip_vs_rr_schedule, }; static int __init ip_vs_rr_init(void) { return register_ip_vs_scheduler(&ip_vs_rr_scheduler); } static void __exit ip_vs_rr_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_rr_scheduler); synchronize_rcu(); } module_init(ip_vs_rr_init); module_exit(ip_vs_rr_cleanup); MODULE_DESCRIPTION("ipvs round-robin scheduler"); MODULE_LICENSE("GPL");
116 11 1234 93 11 1234 92 1238 93 116 3 212 1 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MMU_NOTIFIER_H #define _LINUX_MMU_NOTIFIER_H #include <linux/list.h> #include <linux/spinlock.h> #include <linux/mm_types.h> #include <linux/mmap_lock.h> #include <linux/srcu.h> #include <linux/interval_tree.h> struct mmu_notifier_subscriptions; struct mmu_notifier; struct mmu_notifier_range; struct mmu_interval_notifier; /** * enum mmu_notifier_event - reason for the mmu notifier callback * @MMU_NOTIFY_UNMAP: either munmap() that unmap the range or a mremap() that * move the range * * @MMU_NOTIFY_CLEAR: clear page table entry (many reasons for this like * madvise() or replacing a page by another one, ...). * * @MMU_NOTIFY_PROTECTION_VMA: update is due to protection change for the range * ie using the vma access permission (vm_page_prot) to update the whole range * is enough no need to inspect changes to the CPU page table (mprotect() * syscall) * * @MMU_NOTIFY_PROTECTION_PAGE: update is due to change in read/write flag for * pages in the range so to mirror those changes the user must inspect the CPU * page table (from the end callback). * * @MMU_NOTIFY_SOFT_DIRTY: soft dirty accounting (still same page and same * access flags). User should soft dirty the page in the end callback to make * sure that anyone relying on soft dirtiness catch pages that might be written * through non CPU mappings. * * @MMU_NOTIFY_RELEASE: used during mmu_interval_notifier invalidate to signal * that the mm refcount is zero and the range is no longer accessible. * * @MMU_NOTIFY_MIGRATE: used during migrate_vma_collect() invalidate to signal * a device driver to possibly ignore the invalidation if the * owner field matches the driver's device private pgmap owner. * * @MMU_NOTIFY_EXCLUSIVE: conversion of a page table entry to device-exclusive. * The owner is initialized to the value provided by the caller of * make_device_exclusive(), such that this caller can filter out these * events. */ enum mmu_notifier_event { MMU_NOTIFY_UNMAP = 0, MMU_NOTIFY_CLEAR, MMU_NOTIFY_PROTECTION_VMA, MMU_NOTIFY_PROTECTION_PAGE, MMU_NOTIFY_SOFT_DIRTY, MMU_NOTIFY_RELEASE, MMU_NOTIFY_MIGRATE, MMU_NOTIFY_EXCLUSIVE, }; #define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0) struct mmu_notifier_ops { /* * Called either by mmu_notifier_unregister or when the mm is * being destroyed by exit_mmap, always before all pages are * freed. This can run concurrently with other mmu notifier * methods (the ones invoked outside the mm context) and it * should tear down all secondary mmu mappings and freeze the * secondary mmu. If this method isn't implemented you've to * be sure that nothing could possibly write to the pages * through the secondary mmu by the time the last thread with * tsk->mm == mm exits. * * As side note: the pages freed after ->release returns could * be immediately reallocated by the gart at an alias physical * address with a different cache model, so if ->release isn't * implemented because all _software_ driven memory accesses * through the secondary mmu are terminated by the time the * last thread of this mm quits, you've also to be sure that * speculative _hardware_ operations can't allocate dirty * cachelines in the cpu that could not be snooped and made * coherent with the other read and write operations happening * through the gart alias address, so leading to memory * corruption. */ void (*release)(struct mmu_notifier *subscription, struct mm_struct *mm); /* * clear_flush_young is called after the VM is * test-and-clearing the young/accessed bitflag in the * pte. This way the VM will provide proper aging to the * accesses to the page through the secondary MMUs and not * only to the ones through the Linux pte. * Start-end is necessary in case the secondary MMU is mapping the page * at a smaller granularity than the primary MMU. */ int (*clear_flush_young)(struct mmu_notifier *subscription, struct mm_struct *mm, unsigned long start, unsigned long end); /* * clear_young is a lightweight version of clear_flush_young. Like the * latter, it is supposed to test-and-clear the young/accessed bitflag * in the secondary pte, but it may omit flushing the secondary tlb. */ int (*clear_young)(struct mmu_notifier *subscription, struct mm_struct *mm, unsigned long start, unsigned long end); /* * test_young is called to check the young/accessed bitflag in * the secondary pte. This is used to know if the page is * frequently used without actually clearing the flag or tearing * down the secondary mapping on the page. */ int (*test_young)(struct mmu_notifier *subscription, struct mm_struct *mm, unsigned long address); /* * invalidate_range_start() and invalidate_range_end() must be * paired and are called only when the mmap_lock and/or the * locks protecting the reverse maps are held. If the subsystem * can't guarantee that no additional references are taken to * the pages in the range, it has to implement the * invalidate_range() notifier to remove any references taken * after invalidate_range_start(). * * Invalidation of multiple concurrent ranges may be * optionally permitted by the driver. Either way the * establishment of sptes is forbidden in the range passed to * invalidate_range_begin/end for the whole duration of the * invalidate_range_begin/end critical section. * * invalidate_range_start() is called when all pages in the * range are still mapped and have at least a refcount of one. * * invalidate_range_end() is called when all pages in the * range have been unmapped and the pages have been freed by * the VM. * * The VM will remove the page table entries and potentially * the page between invalidate_range_start() and * invalidate_range_end(). If the page must not be freed * because of pending I/O or other circumstances then the * invalidate_range_start() callback (or the initial mapping * by the driver) must make sure that the refcount is kept * elevated. * * If the driver increases the refcount when the pages are * initially mapped into an address space then either * invalidate_range_start() or invalidate_range_end() may * decrease the refcount. If the refcount is decreased on * invalidate_range_start() then the VM can free pages as page * table entries are removed. If the refcount is only * dropped on invalidate_range_end() then the driver itself * will drop the last refcount but it must take care to flush * any secondary tlb before doing the final free on the * page. Pages will no longer be referenced by the linux * address space but may still be referenced by sptes until * the last refcount is dropped. * * If blockable argument is set to false then the callback cannot * sleep and has to return with -EAGAIN if sleeping would be required. * 0 should be returned otherwise. Please note that notifiers that can * fail invalidate_range_start are not allowed to implement * invalidate_range_end, as there is no mechanism for informing the * notifier that its start failed. */ int (*invalidate_range_start)(struct mmu_notifier *subscription, const struct mmu_notifier_range *range); void (*invalidate_range_end)(struct mmu_notifier *subscription, const struct mmu_notifier_range *range); /* * arch_invalidate_secondary_tlbs() is used to manage a non-CPU TLB * which shares page-tables with the CPU. The * invalidate_range_start()/end() callbacks should not be implemented as * invalidate_secondary_tlbs() already catches the points in time when * an external TLB needs to be flushed. * * This requires arch_invalidate_secondary_tlbs() to be called while * holding the ptl spin-lock and therefore this callback is not allowed * to sleep. * * This is called by architecture code whenever invalidating a TLB * entry. It is assumed that any secondary TLB has the same rules for * when invalidations are required. If this is not the case architecture * code will need to call this explicitly when required for secondary * TLB invalidation. */ void (*arch_invalidate_secondary_tlbs)( struct mmu_notifier *subscription, struct mm_struct *mm, unsigned long start, unsigned long end); /* * These callbacks are used with the get/put interface to manage the * lifetime of the mmu_notifier memory. alloc_notifier() returns a new * notifier for use with the mm. * * free_notifier() is only called after the mmu_notifier has been * fully put, calls to any ops callback are prevented and no ops * callbacks are currently running. It is called from a SRCU callback * and cannot sleep. */ struct mmu_notifier *(*alloc_notifier)(struct mm_struct *mm); void (*free_notifier)(struct mmu_notifier *subscription); }; /* * The notifier chains are protected by mmap_lock and/or the reverse map * semaphores. Notifier chains are only changed when all reverse maps and * the mmap_lock locks are taken. * * Therefore notifier chains can only be traversed when either * * 1. mmap_lock is held. * 2. One of the reverse map locks is held (i_mmap_rwsem or anon_vma->rwsem). * 3. No other concurrent thread can access the list (release) */ struct mmu_notifier { struct hlist_node hlist; const struct mmu_notifier_ops *ops; struct mm_struct *mm; struct rcu_head rcu; unsigned int users; }; /** * struct mmu_interval_notifier_ops * @invalidate: Upon return the caller must stop using any SPTEs within this * range. This function can sleep. Return false only if sleeping * was required but mmu_notifier_range_blockable(range) is false. */ struct mmu_interval_notifier_ops { bool (*invalidate)(struct mmu_interval_notifier *interval_sub, const struct mmu_notifier_range *range, unsigned long cur_seq); }; struct mmu_interval_notifier { struct interval_tree_node interval_tree; const struct mmu_interval_notifier_ops *ops; struct mm_struct *mm; struct hlist_node deferred_item; unsigned long invalidate_seq; }; #ifdef CONFIG_MMU_NOTIFIER #ifdef CONFIG_LOCKDEP extern struct lockdep_map __mmu_notifier_invalidate_range_start_map; #endif struct mmu_notifier_range { struct mm_struct *mm; unsigned long start; unsigned long end; unsigned flags; enum mmu_notifier_event event; void *owner; }; static inline int mm_has_notifiers(struct mm_struct *mm) { return unlikely(mm->notifier_subscriptions); } struct mmu_notifier *mmu_notifier_get_locked(const struct mmu_notifier_ops *ops, struct mm_struct *mm); static inline struct mmu_notifier * mmu_notifier_get(const struct mmu_notifier_ops *ops, struct mm_struct *mm) { struct mmu_notifier *ret; mmap_write_lock(mm); ret = mmu_notifier_get_locked(ops, mm); mmap_write_unlock(mm); return ret; } void mmu_notifier_put(struct mmu_notifier *subscription); void mmu_notifier_synchronize(void); extern int mmu_notifier_register(struct mmu_notifier *subscription, struct mm_struct *mm); extern int __mmu_notifier_register(struct mmu_notifier *subscription, struct mm_struct *mm); extern void mmu_notifier_unregister(struct mmu_notifier *subscription, struct mm_struct *mm); unsigned long mmu_interval_read_begin(struct mmu_interval_notifier *interval_sub); int mmu_interval_notifier_insert(struct mmu_interval_notifier *interval_sub, struct mm_struct *mm, unsigned long start, unsigned long length, const struct mmu_interval_notifier_ops *ops); int mmu_interval_notifier_insert_locked( struct mmu_interval_notifier *interval_sub, struct mm_struct *mm, unsigned long start, unsigned long length, const struct mmu_interval_notifier_ops *ops); void mmu_interval_notifier_remove(struct mmu_interval_notifier *interval_sub); /** * mmu_interval_set_seq - Save the invalidation sequence * @interval_sub - The subscription passed to invalidate * @cur_seq - The cur_seq passed to the invalidate() callback * * This must be called unconditionally from the invalidate callback of a * struct mmu_interval_notifier_ops under the same lock that is used to call * mmu_interval_read_retry(). It updates the sequence number for later use by * mmu_interval_read_retry(). The provided cur_seq will always be odd. * * If the caller does not call mmu_interval_read_begin() or * mmu_interval_read_retry() then this call is not required. */ static inline void mmu_interval_set_seq(struct mmu_interval_notifier *interval_sub, unsigned long cur_seq) { WRITE_ONCE(interval_sub->invalidate_seq, cur_seq); } /** * mmu_interval_read_retry - End a read side critical section against a VA range * interval_sub: The subscription * seq: The return of the paired mmu_interval_read_begin() * * This MUST be called under a user provided lock that is also held * unconditionally by op->invalidate() when it calls mmu_interval_set_seq(). * * Each call should be paired with a single mmu_interval_read_begin() and * should be used to conclude the read side. * * Returns true if an invalidation collided with this critical section, and * the caller should retry. */ static inline bool mmu_interval_read_retry(struct mmu_interval_notifier *interval_sub, unsigned long seq) { return interval_sub->invalidate_seq != seq; } /** * mmu_interval_check_retry - Test if a collision has occurred * interval_sub: The subscription * seq: The return of the matching mmu_interval_read_begin() * * This can be used in the critical section between mmu_interval_read_begin() * and mmu_interval_read_retry(). A return of true indicates an invalidation * has collided with this critical region and a future * mmu_interval_read_retry() will return true. * * False is not reliable and only suggests a collision may not have * occurred. It can be called many times and does not have to hold the user * provided lock. * * This call can be used as part of loops and other expensive operations to * expedite a retry. */ static inline bool mmu_interval_check_retry(struct mmu_interval_notifier *interval_sub, unsigned long seq) { /* Pairs with the WRITE_ONCE in mmu_interval_set_seq() */ return READ_ONCE(interval_sub->invalidate_seq) != seq; } extern void __mmu_notifier_subscriptions_destroy(struct mm_struct *mm); extern void __mmu_notifier_release(struct mm_struct *mm); extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, unsigned long start, unsigned long end); extern int __mmu_notifier_clear_young(struct mm_struct *mm, unsigned long start, unsigned long end); extern int __mmu_notifier_test_young(struct mm_struct *mm, unsigned long address); extern int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *r); extern void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *r); extern void __mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm, unsigned long start, unsigned long end); extern bool mmu_notifier_range_update_to_read_only(const struct mmu_notifier_range *range); static inline bool mmu_notifier_range_blockable(const struct mmu_notifier_range *range) { return (range->flags & MMU_NOTIFIER_RANGE_BLOCKABLE); } static inline void mmu_notifier_release(struct mm_struct *mm) { if (mm_has_notifiers(mm)) __mmu_notifier_release(mm); } static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, unsigned long start, unsigned long end) { if (mm_has_notifiers(mm)) return __mmu_notifier_clear_flush_young(mm, start, end); return 0; } static inline int mmu_notifier_clear_young(struct mm_struct *mm, unsigned long start, unsigned long end) { if (mm_has_notifiers(mm)) return __mmu_notifier_clear_young(mm, start, end); return 0; } static inline int mmu_notifier_test_young(struct mm_struct *mm, unsigned long address) { if (mm_has_notifiers(mm)) return __mmu_notifier_test_young(mm, address); return 0; } static inline void mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range) { might_sleep(); lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); if (mm_has_notifiers(range->mm)) { range->flags |= MMU_NOTIFIER_RANGE_BLOCKABLE; __mmu_notifier_invalidate_range_start(range); } lock_map_release(&__mmu_notifier_invalidate_range_start_map); } /* * This version of mmu_notifier_invalidate_range_start() avoids blocking, but it * can return an error if a notifier can't proceed without blocking, in which * case you're not allowed to modify PTEs in the specified range. * * This is mainly intended for OOM handling. */ static inline int __must_check mmu_notifier_invalidate_range_start_nonblock(struct mmu_notifier_range *range) { int ret = 0; lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); if (mm_has_notifiers(range->mm)) { range->flags &= ~MMU_NOTIFIER_RANGE_BLOCKABLE; ret = __mmu_notifier_invalidate_range_start(range); } lock_map_release(&__mmu_notifier_invalidate_range_start_map); return ret; } static inline void mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range) { if (mmu_notifier_range_blockable(range)) might_sleep(); if (mm_has_notifiers(range->mm)) __mmu_notifier_invalidate_range_end(range); } static inline void mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm, unsigned long start, unsigned long end) { if (mm_has_notifiers(mm)) __mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end); } static inline void mmu_notifier_subscriptions_init(struct mm_struct *mm) { mm->notifier_subscriptions = NULL; } static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm) { if (mm_has_notifiers(mm)) __mmu_notifier_subscriptions_destroy(mm); } static inline void mmu_notifier_range_init(struct mmu_notifier_range *range, enum mmu_notifier_event event, unsigned flags, struct mm_struct *mm, unsigned long start, unsigned long end) { range->event = event; range->mm = mm; range->start = start; range->end = end; range->flags = flags; } static inline void mmu_notifier_range_init_owner( struct mmu_notifier_range *range, enum mmu_notifier_event event, unsigned int flags, struct mm_struct *mm, unsigned long start, unsigned long end, void *owner) { mmu_notifier_range_init(range, event, flags, mm, start, end); range->owner = owner; } #define ptep_clear_flush_young_notify(__vma, __address, __ptep) \ ({ \ int __young; \ struct vm_area_struct *___vma = __vma; \ unsigned long ___address = __address; \ __young = ptep_clear_flush_young(___vma, ___address, __ptep); \ __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ ___address, \ ___address + \ PAGE_SIZE); \ __young; \ }) #define pmdp_clear_flush_young_notify(__vma, __address, __pmdp) \ ({ \ int __young; \ struct vm_area_struct *___vma = __vma; \ unsigned long ___address = __address; \ __young = pmdp_clear_flush_young(___vma, ___address, __pmdp); \ __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ ___address, \ ___address + \ PMD_SIZE); \ __young; \ }) #define ptep_clear_young_notify(__vma, __address, __ptep) \ ({ \ int __young; \ struct vm_area_struct *___vma = __vma; \ unsigned long ___address = __address; \ __young = ptep_test_and_clear_young(___vma, ___address, __ptep);\ __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address, \ ___address + PAGE_SIZE); \ __young; \ }) #define pmdp_clear_young_notify(__vma, __address, __pmdp) \ ({ \ int __young; \ struct vm_area_struct *___vma = __vma; \ unsigned long ___address = __address; \ __young = pmdp_test_and_clear_young(___vma, ___address, __pmdp);\ __young |= mmu_notifier_clear_young(___vma->vm_mm, ___address, \ ___address + PMD_SIZE); \ __young; \ }) #else /* CONFIG_MMU_NOTIFIER */ struct mmu_notifier_range { unsigned long start; unsigned long end; }; static inline void _mmu_notifier_range_init(struct mmu_notifier_range *range, unsigned long start, unsigned long end) { range->start = start; range->end = end; } #define mmu_notifier_range_init(range,event,flags,mm,start,end) \ _mmu_notifier_range_init(range, start, end) #define mmu_notifier_range_init_owner(range, event, flags, mm, start, \ end, owner) \ _mmu_notifier_range_init(range, start, end) static inline bool mmu_notifier_range_blockable(const struct mmu_notifier_range *range) { return true; } static inline int mm_has_notifiers(struct mm_struct *mm) { return 0; } static inline void mmu_notifier_release(struct mm_struct *mm) { } static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, unsigned long start, unsigned long end) { return 0; } static inline int mmu_notifier_clear_young(struct mm_struct *mm, unsigned long start, unsigned long end) { return 0; } static inline int mmu_notifier_test_young(struct mm_struct *mm, unsigned long address) { return 0; } static inline void mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range) { } static inline int mmu_notifier_invalidate_range_start_nonblock(struct mmu_notifier_range *range) { return 0; } static inline void mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range) { } static inline void mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm, unsigned long start, unsigned long end) { } static inline void mmu_notifier_subscriptions_init(struct mm_struct *mm) { } static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm) { } #define mmu_notifier_range_update_to_read_only(r) false #define ptep_clear_flush_young_notify ptep_clear_flush_young #define pmdp_clear_flush_young_notify pmdp_clear_flush_young #define ptep_clear_young_notify ptep_test_and_clear_young #define pmdp_clear_young_notify pmdp_test_and_clear_young static inline void mmu_notifier_synchronize(void) { } #endif /* CONFIG_MMU_NOTIFIER */ #endif /* _LINUX_MMU_NOTIFIER_H */
2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 // SPDX-License-Identifier: GPL-2.0-only /* * Common interrupt code for 32 and 64 bit */ #include <linux/cpu.h> #include <linux/interrupt.h> #include <linux/kernel_stat.h> #include <linux/of.h> #include <linux/seq_file.h> #include <linux/smp.h> #include <linux/ftrace.h> #include <linux/delay.h> #include <linux/export.h> #include <linux/irq.h> #include <linux/kvm_types.h> #include <asm/irq_stack.h> #include <asm/apic.h> #include <asm/io_apic.h> #include <asm/irq.h> #include <asm/mce.h> #include <asm/hw_irq.h> #include <asm/desc.h> #include <asm/traps.h> #include <asm/thermal.h> #include <asm/posted_intr.h> #include <asm/irq_remapping.h> #if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_X86_THERMAL_VECTOR) #define CREATE_TRACE_POINTS #include <asm/trace/irq_vectors.h> #endif DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); DEFINE_PER_CPU_CACHE_HOT(u16, __softirq_pending); EXPORT_PER_CPU_SYMBOL(__softirq_pending); DEFINE_PER_CPU_CACHE_HOT(struct irq_stack *, hardirq_stack_ptr); atomic_t irq_err_count; /* * 'what should we do if we get a hw irq event on an illegal vector'. * each architecture has to answer this themselves. */ void ack_bad_irq(unsigned int irq) { if (printk_ratelimit()) pr_err("unexpected IRQ trap at vector %02x\n", irq); /* * Currently unexpected vectors happen only on SMP and APIC. * We _must_ ack these because every local APIC has only N * irq slots per priority level, and a 'hanging, unacked' IRQ * holds up an irq slot - in excessive cases (when multiple * unexpected vectors occur) that might lock up the APIC * completely. * But only ack when the APIC is enabled -AK */ apic_eoi(); } #define irq_stats(x) (&per_cpu(irq_stat, x)) /* * /proc/interrupts printing for arch specific interrupts */ int arch_show_interrupts(struct seq_file *p, int prec) { int j; seq_printf(p, "%*s: ", prec, "NMI"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->__nmi_count); seq_puts(p, " Non-maskable interrupts\n"); #ifdef CONFIG_X86_LOCAL_APIC seq_printf(p, "%*s: ", prec, "LOC"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs); seq_puts(p, " Local timer interrupts\n"); seq_printf(p, "%*s: ", prec, "SPU"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); seq_puts(p, " Spurious interrupts\n"); seq_printf(p, "%*s: ", prec, "PMI"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); seq_puts(p, " Performance monitoring interrupts\n"); seq_printf(p, "%*s: ", prec, "IWI"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs); seq_puts(p, " IRQ work interrupts\n"); seq_printf(p, "%*s: ", prec, "RTR"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count); seq_puts(p, " APIC ICR read retries\n"); if (x86_platform_ipi_callback) { seq_printf(p, "%*s: ", prec, "PLT"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis); seq_puts(p, " Platform interrupts\n"); } #endif #ifdef CONFIG_SMP seq_printf(p, "%*s: ", prec, "RES"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count); seq_puts(p, " Rescheduling interrupts\n"); seq_printf(p, "%*s: ", prec, "CAL"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); seq_puts(p, " Function call interrupts\n"); seq_printf(p, "%*s: ", prec, "TLB"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); seq_puts(p, " TLB shootdowns\n"); #endif #ifdef CONFIG_X86_THERMAL_VECTOR seq_printf(p, "%*s: ", prec, "TRM"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); seq_puts(p, " Thermal event interrupts\n"); #endif #ifdef CONFIG_X86_MCE_THRESHOLD seq_printf(p, "%*s: ", prec, "THR"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); seq_puts(p, " Threshold APIC interrupts\n"); #endif #ifdef CONFIG_X86_MCE_AMD seq_printf(p, "%*s: ", prec, "DFR"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_deferred_error_count); seq_puts(p, " Deferred Error APIC interrupts\n"); #endif #ifdef CONFIG_X86_MCE seq_printf(p, "%*s: ", prec, "MCE"); for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(mce_exception_count, j)); seq_puts(p, " Machine check exceptions\n"); seq_printf(p, "%*s: ", prec, "MCP"); for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(mce_poll_count, j)); seq_puts(p, " Machine check polls\n"); #endif #ifdef CONFIG_X86_HV_CALLBACK_VECTOR if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) { seq_printf(p, "%*s: ", prec, "HYP"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_hv_callback_count); seq_puts(p, " Hypervisor callback interrupts\n"); } #endif #if IS_ENABLED(CONFIG_HYPERV) if (test_bit(HYPERV_REENLIGHTENMENT_VECTOR, system_vectors)) { seq_printf(p, "%*s: ", prec, "HRE"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_hv_reenlightenment_count); seq_puts(p, " Hyper-V reenlightenment interrupts\n"); } if (test_bit(HYPERV_STIMER0_VECTOR, system_vectors)) { seq_printf(p, "%*s: ", prec, "HVS"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->hyperv_stimer0_count); seq_puts(p, " Hyper-V stimer0 interrupts\n"); } #endif seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); #if defined(CONFIG_X86_IO_APIC) seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); #endif #if IS_ENABLED(CONFIG_KVM) seq_printf(p, "%*s: ", prec, "PIN"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis); seq_puts(p, " Posted-interrupt notification event\n"); seq_printf(p, "%*s: ", prec, "NPI"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_nested_ipis); seq_puts(p, " Nested posted-interrupt event\n"); seq_printf(p, "%*s: ", prec, "PIW"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_wakeup_ipis); seq_puts(p, " Posted-interrupt wakeup event\n"); #endif #ifdef CONFIG_X86_POSTED_MSI seq_printf(p, "%*s: ", prec, "PMN"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->posted_msi_notification_count); seq_puts(p, " Posted MSI notification event\n"); #endif return 0; } /* * /proc/stat helpers */ u64 arch_irq_stat_cpu(unsigned int cpu) { u64 sum = irq_stats(cpu)->__nmi_count; #ifdef CONFIG_X86_LOCAL_APIC sum += irq_stats(cpu)->apic_timer_irqs; sum += irq_stats(cpu)->irq_spurious_count; sum += irq_stats(cpu)->apic_perf_irqs; sum += irq_stats(cpu)->apic_irq_work_irqs; sum += irq_stats(cpu)->icr_read_retry_count; if (x86_platform_ipi_callback) sum += irq_stats(cpu)->x86_platform_ipis; #endif #ifdef CONFIG_SMP sum += irq_stats(cpu)->irq_resched_count; sum += irq_stats(cpu)->irq_call_count; #endif #ifdef CONFIG_X86_THERMAL_VECTOR sum += irq_stats(cpu)->irq_thermal_count; #endif #ifdef CONFIG_X86_MCE_THRESHOLD sum += irq_stats(cpu)->irq_threshold_count; #endif #ifdef CONFIG_X86_HV_CALLBACK_VECTOR sum += irq_stats(cpu)->irq_hv_callback_count; #endif #if IS_ENABLED(CONFIG_HYPERV) sum += irq_stats(cpu)->irq_hv_reenlightenment_count; sum += irq_stats(cpu)->hyperv_stimer0_count; #endif #ifdef CONFIG_X86_MCE sum += per_cpu(mce_exception_count, cpu); sum += per_cpu(mce_poll_count, cpu); #endif return sum; } u64 arch_irq_stat(void) { u64 sum = atomic_read(&irq_err_count); return sum; } static __always_inline void handle_irq(struct irq_desc *desc, struct pt_regs *regs) { if (IS_ENABLED(CONFIG_X86_64)) generic_handle_irq_desc(desc); else __handle_irq(desc, regs); } static struct irq_desc *reevaluate_vector(int vector) { struct irq_desc *desc = __this_cpu_read(vector_irq[vector]); if (!IS_ERR_OR_NULL(desc)) return desc; if (desc == VECTOR_UNUSED) pr_emerg_ratelimited("No irq handler for %d.%u\n", smp_processor_id(), vector); else __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); return NULL; } static __always_inline bool call_irq_handler(int vector, struct pt_regs *regs) { struct irq_desc *desc = __this_cpu_read(vector_irq[vector]); if (likely(!IS_ERR_OR_NULL(desc))) { handle_irq(desc, regs); return true; } /* * Reevaluate with vector_lock held to prevent a race against * request_irq() setting up the vector: * * CPU0 CPU1 * interrupt is raised in APIC IRR * but not handled * free_irq() * per_cpu(vector_irq, CPU1)[vector] = VECTOR_SHUTDOWN; * * request_irq() common_interrupt() * d = this_cpu_read(vector_irq[vector]); * * per_cpu(vector_irq, CPU1)[vector] = desc; * * if (d == VECTOR_SHUTDOWN) * this_cpu_write(vector_irq[vector], VECTOR_UNUSED); * * This requires that the same vector on the same target CPU is * handed out or that a spurious interrupt hits that CPU/vector. */ lock_vector_lock(); desc = reevaluate_vector(vector); unlock_vector_lock(); if (!desc) return false; handle_irq(desc, regs); return true; } /* * common_interrupt() handles all normal device IRQ's (the special SMP * cross-CPU interrupts have their own entry points). */ DEFINE_IDTENTRY_IRQ(common_interrupt) { struct pt_regs *old_regs = set_irq_regs(regs); /* entry code tells RCU that we're not quiescent. Check it. */ RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU"); if (unlikely(!call_irq_handler(vector, regs))) apic_eoi(); set_irq_regs(old_regs); } #ifdef CONFIG_X86_LOCAL_APIC /* Function pointer for generic interrupt vector handling */ void (*x86_platform_ipi_callback)(void) = NULL; /* * Handler for X86_PLATFORM_IPI_VECTOR. */ DEFINE_IDTENTRY_SYSVEC(sysvec_x86_platform_ipi) { struct pt_regs *old_regs = set_irq_regs(regs); apic_eoi(); trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR); inc_irq_stat(x86_platform_ipis); if (x86_platform_ipi_callback) x86_platform_ipi_callback(); trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR); set_irq_regs(old_regs); } #endif #if IS_ENABLED(CONFIG_KVM) static void dummy_handler(void) {} static void (*kvm_posted_intr_wakeup_handler)(void) = dummy_handler; void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)) { if (handler) kvm_posted_intr_wakeup_handler = handler; else { kvm_posted_intr_wakeup_handler = dummy_handler; synchronize_rcu(); } } EXPORT_SYMBOL_FOR_KVM(kvm_set_posted_intr_wakeup_handler); /* * Handler for POSTED_INTERRUPT_VECTOR. */ DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_ipi) { apic_eoi(); inc_irq_stat(kvm_posted_intr_ipis); } /* * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. */ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_posted_intr_wakeup_ipi) { apic_eoi(); inc_irq_stat(kvm_posted_intr_wakeup_ipis); kvm_posted_intr_wakeup_handler(); } /* * Handler for POSTED_INTERRUPT_NESTED_VECTOR. */ DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi) { apic_eoi(); inc_irq_stat(kvm_posted_intr_nested_ipis); } #endif #ifdef CONFIG_X86_POSTED_MSI /* Posted Interrupt Descriptors for coalesced MSIs to be posted */ DEFINE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc); void intel_posted_msi_init(void) { u32 destination; u32 apic_id; this_cpu_write(posted_msi_pi_desc.nv, POSTED_MSI_NOTIFICATION_VECTOR); /* * APIC destination ID is stored in bit 8:15 while in XAPIC mode. * VT-d spec. CH 9.11 */ apic_id = this_cpu_read(x86_cpu_to_apicid); destination = x2apic_enabled() ? apic_id : apic_id << 8; this_cpu_write(posted_msi_pi_desc.ndst, destination); } static __always_inline bool handle_pending_pir(unsigned long *pir, struct pt_regs *regs) { unsigned long pir_copy[NR_PIR_WORDS]; int vec = FIRST_EXTERNAL_VECTOR; if (!pi_harvest_pir(pir, pir_copy)) return false; for_each_set_bit_from(vec, pir_copy, FIRST_SYSTEM_VECTOR) call_irq_handler(vec, regs); return true; } /* * Performance data shows that 3 is good enough to harvest 90+% of the benefit * on high IRQ rate workload. */ #define MAX_POSTED_MSI_COALESCING_LOOP 3 /* * For MSIs that are delivered as posted interrupts, the CPU notifications * can be coalesced if the MSIs arrive in high frequency bursts. */ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi_notification) { struct pt_regs *old_regs = set_irq_regs(regs); struct pi_desc *pid; int i = 0; pid = this_cpu_ptr(&posted_msi_pi_desc); inc_irq_stat(posted_msi_notification_count); irq_enter(); /* * Max coalescing count includes the extra round of handle_pending_pir * after clearing the outstanding notification bit. Hence, at most * MAX_POSTED_MSI_COALESCING_LOOP - 1 loops are executed here. */ while (++i < MAX_POSTED_MSI_COALESCING_LOOP) { if (!handle_pending_pir(pid->pir, regs)) break; } /* * Clear outstanding notification bit to allow new IRQ notifications, * do this last to maximize the window of interrupt coalescing. */ pi_clear_on(pid); /* * There could be a race of PI notification and the clearing of ON bit, * process PIR bits one last time such that handling the new interrupts * are not delayed until the next IRQ. */ handle_pending_pir(pid->pir, regs); apic_eoi(); irq_exit(); set_irq_regs(old_regs); } #endif /* X86_POSTED_MSI */ #ifdef CONFIG_HOTPLUG_CPU /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ void fixup_irqs(void) { unsigned int vector; struct irq_desc *desc; struct irq_data *data; struct irq_chip *chip; irq_migrate_all_off_this_cpu(); /* * We can remove mdelay() and then send spurious interrupts to * new cpu targets for all the irqs that were handled previously by * this cpu. While it works, I have seen spurious interrupt messages * (nothing wrong but still...). * * So for now, retain mdelay(1) and check the IRR and then send those * interrupts to new targets as this cpu is already offlined... */ mdelay(1); /* * We can walk the vector array of this cpu without holding * vector_lock because the cpu is already marked !online, so * nothing else will touch it. */ for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { if (IS_ERR_OR_NULL(__this_cpu_read(vector_irq[vector]))) continue; if (is_vector_pending(vector)) { desc = __this_cpu_read(vector_irq[vector]); raw_spin_lock(&desc->lock); data = irq_desc_get_irq_data(desc); chip = irq_data_get_irq_chip(data); if (chip->irq_retrigger) { chip->irq_retrigger(data); __this_cpu_write(vector_irq[vector], VECTOR_RETRIGGERED); } raw_spin_unlock(&desc->lock); } if (__this_cpu_read(vector_irq[vector]) != VECTOR_RETRIGGERED) __this_cpu_write(vector_irq[vector], VECTOR_UNUSED); } } #endif #ifdef CONFIG_X86_THERMAL_VECTOR static void smp_thermal_vector(void) { if (x86_thermal_enabled()) intel_thermal_interrupt(); else pr_err("CPU%d: Unexpected LVT thermal interrupt!\n", smp_processor_id()); } DEFINE_IDTENTRY_SYSVEC(sysvec_thermal) { trace_thermal_apic_entry(THERMAL_APIC_VECTOR); inc_irq_stat(irq_thermal_count); smp_thermal_vector(); trace_thermal_apic_exit(THERMAL_APIC_VECTOR); apic_eoi(); } #endif
3 3 3 3 3 3 3 3 3 2 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 // SPDX-License-Identifier: GPL-2.0-or-later /* * usbusx2y.c - ALSA USB US-428 Driver * 2005-04-14 Karsten Wiese Version 0.8.7.2: Call snd_card_free() instead of snd_card_free_in_thread() to prevent oops with dead keyboard symptom. Tested ok with kernel 2.6.12-rc2. 2004-12-14 Karsten Wiese Version 0.8.7.1: snd_pcm_open for rawusb pcm-devices now returns -EBUSY if called without rawusb's hwdep device being open. 2004-12-02 Karsten Wiese Version 0.8.7: Use macro usb_maxpacket() for portability. 2004-10-26 Karsten Wiese Version 0.8.6: wake_up() process waiting in usx2y_urbs_start() on error. 2004-10-21 Karsten Wiese Version 0.8.5: nrpacks is runtime or compiletime configurable now with tested values from 1 to 4. 2004-10-03 Karsten Wiese Version 0.8.2: Avoid any possible racing while in prepare callback. 2004-09-30 Karsten Wiese Version 0.8.0: Simplified things and made ohci work again. 2004-09-20 Karsten Wiese Version 0.7.3: Use usb_kill_urb() instead of deprecated (kernel 2.6.9) usb_unlink_urb(). 2004-07-13 Karsten Wiese Version 0.7.1: Don't sleep in START/STOP callbacks anymore. us428 channels C/D not handled just for this version, sorry. 2004-06-21 Karsten Wiese Version 0.6.4: Temporarely suspend midi input to sanely call usb_set_interface() when setting format. 2004-06-12 Karsten Wiese Version 0.6.3: Made it thus the following rule is enforced: "All pcm substreams of one usx2y have to operate at the same rate & format." 2004-04-06 Karsten Wiese Version 0.6.0: Runs on 2.6.5 kernel without any "--with-debug=" things. us224 reported running. 2004-01-14 Karsten Wiese Version 0.5.1: Runs with 2.6.1 kernel. 2003-12-30 Karsten Wiese Version 0.4.1: Fix 24Bit 4Channel capturing for the us428. 2003-11-27 Karsten Wiese, Martin Langer Version 0.4: us122 support. us224 could be tested by uncommenting the sections containing USB_ID_US224 2003-11-03 Karsten Wiese Version 0.3: 24Bit support. "arecord -D hw:1 -c 2 -r 48000 -M -f S24_3LE|aplay -D hw:1 -c 2 -r 48000 -M -f S24_3LE" works. 2003-08-22 Karsten Wiese Version 0.0.8: Removed EZUSB Firmware. First Stage Firmwaredownload is now done by tascam-firmware downloader. See: http://usb-midi-fw.sourceforge.net/tascam-firmware.tar.gz 2003-06-18 Karsten Wiese Version 0.0.5: changed to compile with kernel 2.4.21 and alsa 0.9.4 2002-10-16 Karsten Wiese Version 0.0.4: compiles again with alsa-current. USB_ISO_ASAP not used anymore (most of the time), instead urb->start_frame is calculated here now, some calls inside usb-driver don't need to happen anymore. To get the best out of this: Disable APM-support in the kernel as APM-BIOS calls (once each second) hard disable interrupt for many precious milliseconds. This helped me much on my slowish PII 400 & PIII 500. ACPI yet untested but might cause the same bad behaviour. Use a kernel with lowlatency and preemptiv patches applied. To autoload snd-usb-midi append a line post-install snd-usb-us428 modprobe snd-usb-midi to /etc/modules.conf. known problems: sliders, knobs, lights not yet handled except MASTER Volume slider. "pcm -c 2" doesn't work. "pcm -c 2 -m direct_interleaved" does. KDE3: "Enable full duplex operation" deadlocks. 2002-08-31 Karsten Wiese Version 0.0.3: audio also simplex; simplifying: iso urbs only 1 packet, melted structs. ASYNC_UNLINK not used anymore: no more crashes so far..... for alsa 0.9 rc3. 2002-08-09 Karsten Wiese Version 0.0.2: midi works with snd-usb-midi, audio (only fullduplex now) with i.e. bristol. The firmware has been sniffed from win2k us-428 driver 3.09. * Copyright (c) 2002 - 2004 Karsten Wiese */ #include <linux/init.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/slab.h> #include <linux/interrupt.h> #include <linux/usb.h> #include <sound/core.h> #include <sound/initval.h> #include <sound/pcm.h> #include <sound/rawmidi.h> #include "usx2y.h" #include "usbusx2y.h" #include "usX2Yhwdep.h" MODULE_AUTHOR("Karsten Wiese <annabellesgarden@yahoo.de>"); MODULE_DESCRIPTION("TASCAM "NAME_ALLCAPS" Version 0.8.7.2"); MODULE_LICENSE("GPL"); static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-max */ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* Id for this card */ static bool enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP; /* Enable this card */ module_param_array(index, int, NULL, 0444); MODULE_PARM_DESC(index, "Index value for "NAME_ALLCAPS"."); module_param_array(id, charp, NULL, 0444); MODULE_PARM_DESC(id, "ID string for "NAME_ALLCAPS"."); module_param_array(enable, bool, NULL, 0444); MODULE_PARM_DESC(enable, "Enable "NAME_ALLCAPS"."); static int snd_usx2y_card_used[SNDRV_CARDS]; static void snd_usx2y_card_private_free(struct snd_card *card); static void usx2y_unlinkseq(struct snd_usx2y_async_seq *s); #ifdef USX2Y_NRPACKS_VARIABLE int nrpacks = USX2Y_NRPACKS; /* number of packets per urb */ module_param(nrpacks, int, 0444); MODULE_PARM_DESC(nrpacks, "Number of packets per URB."); #endif /* * pipe 4 is used for switching the lamps, setting samplerate, volumes .... */ static void i_usx2y_out04_int(struct urb *urb) { #ifdef CONFIG_SND_DEBUG if (urb->status) { int i; struct usx2ydev *usx2y = urb->context; for (i = 0; i < 10 && usx2y->as04.urb[i] != urb; i++) ; dev_dbg(&urb->dev->dev, "%s urb %i status=%i\n", __func__, i, urb->status); } #endif } static void i_usx2y_in04_int(struct urb *urb) { int err = 0; struct usx2ydev *usx2y = urb->context; struct us428ctls_sharedmem *us428ctls = usx2y->us428ctls_sharedmem; struct us428_p4out *p4out; int i, j, n, diff, send; usx2y->in04_int_calls++; if (urb->status) { dev_dbg(&urb->dev->dev, "Interrupt Pipe 4 came back with status=%i\n", urb->status); return; } if (us428ctls) { diff = -1; if (us428ctls->ctl_snapshot_last == -2) { diff = 0; memcpy(usx2y->in04_last, usx2y->in04_buf, sizeof(usx2y->in04_last)); us428ctls->ctl_snapshot_last = -1; } else { for (i = 0; i < 21; i++) { if (usx2y->in04_last[i] != ((char *)usx2y->in04_buf)[i]) { if (diff < 0) diff = i; usx2y->in04_last[i] = ((char *)usx2y->in04_buf)[i]; } } } if (diff >= 0) { n = us428ctls->ctl_snapshot_last + 1; if (n >= N_US428_CTL_BUFS || n < 0) n = 0; memcpy(us428ctls->ctl_snapshot + n, usx2y->in04_buf, sizeof(us428ctls->ctl_snapshot[0])); us428ctls->ctl_snapshot_differs_at[n] = diff; us428ctls->ctl_snapshot_last = n; wake_up(&usx2y->us428ctls_wait_queue_head); } } if (usx2y->us04) { if (!usx2y->us04->submitted) { do { err = usb_submit_urb(usx2y->us04->urb[usx2y->us04->submitted++], GFP_ATOMIC); } while (!err && usx2y->us04->submitted < usx2y->us04->len); } } else { if (us428ctls && us428ctls->p4out_last >= 0 && us428ctls->p4out_last < N_US428_P4OUT_BUFS) { if (us428ctls->p4out_last != us428ctls->p4out_sent) { send = us428ctls->p4out_sent + 1; if (send >= N_US428_P4OUT_BUFS) send = 0; for (j = 0; j < URBS_ASYNC_SEQ && !err; ++j) { if (!usx2y->as04.urb[j]->status) { p4out = us428ctls->p4out + send; // FIXME if more than 1 p4out is new, 1 gets lost. usb_fill_bulk_urb(usx2y->as04.urb[j], usx2y->dev, usb_sndbulkpipe(usx2y->dev, 0x04), &p4out->val.vol, p4out->type == ELT_LIGHT ? sizeof(struct us428_lights) : 5, i_usx2y_out04_int, usx2y); err = usb_submit_urb(usx2y->as04.urb[j], GFP_ATOMIC); us428ctls->p4out_sent = send; break; } } } } } if (err) dev_err(&urb->dev->dev, "in04_int() usb_submit_urb err=%i\n", err); urb->dev = usx2y->dev; usb_submit_urb(urb, GFP_ATOMIC); } /* * Prepare some urbs */ int usx2y_async_seq04_init(struct usx2ydev *usx2y) { int err = 0, i; if (WARN_ON(usx2y->as04.buffer)) return -EBUSY; usx2y->as04.buffer = kmalloc_array(URBS_ASYNC_SEQ, URB_DATA_LEN_ASYNC_SEQ, GFP_KERNEL); if (!usx2y->as04.buffer) { err = -ENOMEM; } else { for (i = 0; i < URBS_ASYNC_SEQ; ++i) { usx2y->as04.urb[i] = usb_alloc_urb(0, GFP_KERNEL); if (!usx2y->as04.urb[i]) { err = -ENOMEM; break; } usb_fill_bulk_urb(usx2y->as04.urb[i], usx2y->dev, usb_sndbulkpipe(usx2y->dev, 0x04), usx2y->as04.buffer + URB_DATA_LEN_ASYNC_SEQ * i, 0, i_usx2y_out04_int, usx2y); err = usb_urb_ep_type_check(usx2y->as04.urb[i]); if (err < 0) break; } } if (err) usx2y_unlinkseq(&usx2y->as04); return err; } int usx2y_in04_init(struct usx2ydev *usx2y) { int err; if (WARN_ON(usx2y->in04_urb)) return -EBUSY; usx2y->in04_urb = usb_alloc_urb(0, GFP_KERNEL); if (!usx2y->in04_urb) { err = -ENOMEM; goto error; } usx2y->in04_buf = kmalloc(21, GFP_KERNEL); if (!usx2y->in04_buf) { err = -ENOMEM; goto error; } init_waitqueue_head(&usx2y->in04_wait_queue); usb_fill_int_urb(usx2y->in04_urb, usx2y->dev, usb_rcvintpipe(usx2y->dev, 0x4), usx2y->in04_buf, 21, i_usx2y_in04_int, usx2y, 10); if (usb_urb_ep_type_check(usx2y->in04_urb)) { err = -EINVAL; goto error; } return usb_submit_urb(usx2y->in04_urb, GFP_KERNEL); error: kfree(usx2y->in04_buf); usb_free_urb(usx2y->in04_urb); usx2y->in04_buf = NULL; usx2y->in04_urb = NULL; return err; } static void usx2y_unlinkseq(struct snd_usx2y_async_seq *s) { int i; for (i = 0; i < URBS_ASYNC_SEQ; ++i) { if (!s->urb[i]) continue; usb_kill_urb(s->urb[i]); usb_free_urb(s->urb[i]); s->urb[i] = NULL; } kfree(s->buffer); s->buffer = NULL; } static const struct usb_device_id snd_usx2y_usb_id_table[] = { { .match_flags = USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x1604, .idProduct = USB_ID_US428 }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x1604, .idProduct = USB_ID_US122 }, { .match_flags = USB_DEVICE_ID_MATCH_DEVICE, .idVendor = 0x1604, .idProduct = USB_ID_US224 }, { /* terminator */ } }; MODULE_DEVICE_TABLE(usb, snd_usx2y_usb_id_table); static int usx2y_create_card(struct usb_device *device, struct usb_interface *intf, struct snd_card **cardp) { int dev; struct snd_card *card; int err; for (dev = 0; dev < SNDRV_CARDS; ++dev) if (enable[dev] && !snd_usx2y_card_used[dev]) break; if (dev >= SNDRV_CARDS) return -ENODEV; err = snd_card_new(&intf->dev, index[dev], id[dev], THIS_MODULE, sizeof(struct usx2ydev), &card); if (err < 0) return err; snd_usx2y_card_used[usx2y(card)->card_index = dev] = 1; card->private_free = snd_usx2y_card_private_free; usx2y(card)->dev = device; init_waitqueue_head(&usx2y(card)->prepare_wait_queue); init_waitqueue_head(&usx2y(card)->us428ctls_wait_queue_head); mutex_init(&usx2y(card)->pcm_mutex); INIT_LIST_HEAD(&usx2y(card)->midi_list); strscpy(card->driver, "USB "NAME_ALLCAPS""); sprintf(card->shortname, "TASCAM "NAME_ALLCAPS""); sprintf(card->longname, "%s (%x:%x if %d at %03d/%03d)", card->shortname, le16_to_cpu(device->descriptor.idVendor), le16_to_cpu(device->descriptor.idProduct), 0,//us428(card)->usbmidi.ifnum, usx2y(card)->dev->bus->busnum, usx2y(card)->dev->devnum); *cardp = card; return 0; } static void snd_usx2y_card_private_free(struct snd_card *card) { struct usx2ydev *usx2y = usx2y(card); kfree(usx2y->in04_buf); usb_free_urb(usx2y->in04_urb); if (usx2y->us428ctls_sharedmem) free_pages_exact(usx2y->us428ctls_sharedmem, US428_SHAREDMEM_PAGES); if (usx2y->card_index >= 0 && usx2y->card_index < SNDRV_CARDS) snd_usx2y_card_used[usx2y->card_index] = 0; } static void snd_usx2y_disconnect(struct usb_interface *intf) { struct snd_card *card; struct usx2ydev *usx2y; struct list_head *p; card = usb_get_intfdata(intf); if (!card) return; usx2y = usx2y(card); usx2y->chip_status = USX2Y_STAT_CHIP_HUP; usx2y_unlinkseq(&usx2y->as04); usb_kill_urb(usx2y->in04_urb); snd_card_disconnect(card); /* release the midi resources */ list_for_each(p, &usx2y->midi_list) { snd_usbmidi_disconnect(p); } if (usx2y->us428ctls_sharedmem) wake_up(&usx2y->us428ctls_wait_queue_head); snd_card_free_when_closed(card); } static int snd_usx2y_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *device = interface_to_usbdev(intf); struct snd_card *card; int err; #ifdef USX2Y_NRPACKS_VARIABLE if (nrpacks < 0 || nrpacks > USX2Y_NRPACKS_MAX) return -EINVAL; #endif if (le16_to_cpu(device->descriptor.idVendor) != 0x1604 || (le16_to_cpu(device->descriptor.idProduct) != USB_ID_US122 && le16_to_cpu(device->descriptor.idProduct) != USB_ID_US224 && le16_to_cpu(device->descriptor.idProduct) != USB_ID_US428)) return -EINVAL; err = usx2y_create_card(device, intf, &card); if (err < 0) return err; err = usx2y_hwdep_new(card, device); if (err < 0) goto error; err = snd_card_register(card); if (err < 0) goto error; dev_set_drvdata(&intf->dev, card); return 0; error: snd_card_free(card); return err; } static struct usb_driver snd_usx2y_usb_driver = { .name = "snd-usb-usx2y", .probe = snd_usx2y_probe, .disconnect = snd_usx2y_disconnect, .id_table = snd_usx2y_usb_id_table, }; module_usb_driver(snd_usx2y_usb_driver);
2 1 1 1 1 2 1 14 1 19 1 1 4 2 1 2 2 1 3 5 1 4 3 2 1 1 5 1 1 3 3 2 2 1 1 8 7 3 4 4 3 1 1 12 30 4 37 8 1 12 7 28 43 43 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright 2008 Red Hat, Inc. All rights reserved. * Copyright 2008 Ian Kent <raven@themaw.net> */ #include <linux/module.h> #include <linux/miscdevice.h> #include <linux/compat.h> #include <linux/fdtable.h> #include <linux/magic.h> #include <linux/nospec.h> #include "autofs_i.h" /* * This module implements an interface for routing autofs ioctl control * commands via a miscellaneous device file. * * The alternate interface is needed because we need to be able open * an ioctl file descriptor on an autofs mount that may be covered by * another mount. This situation arises when starting automount(8) * or other user space daemon which uses direct mounts or offset * mounts (used for autofs lazy mount/umount of nested mount trees), * which have been left busy at service shutdown. */ typedef int (*ioctl_fn)(struct file *, struct autofs_sb_info *, struct autofs_dev_ioctl *); static int check_name(const char *name) { if (!strchr(name, '/')) return -EINVAL; return 0; } /* * Check a string doesn't overrun the chunk of * memory we copied from user land. */ static int invalid_str(char *str, size_t size) { if (memchr(str, 0, size)) return 0; return -EINVAL; } /* * Check that the user compiled against correct version of autofs * misc device code. * * As well as checking the version compatibility this always copies * the kernel interface version out. */ static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param) { int err = 0; if ((param->ver_major != AUTOFS_DEV_IOCTL_VERSION_MAJOR) || (param->ver_minor > AUTOFS_DEV_IOCTL_VERSION_MINOR)) { pr_warn("ioctl control interface version mismatch: " "kernel(%u.%u), user(%u.%u), cmd(0x%08x)\n", AUTOFS_DEV_IOCTL_VERSION_MAJOR, AUTOFS_DEV_IOCTL_VERSION_MINOR, param->ver_major, param->ver_minor, cmd); err = -EINVAL; } /* Fill in the kernel version. */ param->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR; param->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR; return err; } /* * Copy parameter control struct, including a possible path allocated * at the end of the struct. */ static struct autofs_dev_ioctl * copy_dev_ioctl(struct autofs_dev_ioctl __user *in) { struct autofs_dev_ioctl tmp, *res; if (copy_from_user(&tmp, in, AUTOFS_DEV_IOCTL_SIZE)) return ERR_PTR(-EFAULT); if (tmp.size < AUTOFS_DEV_IOCTL_SIZE) return ERR_PTR(-EINVAL); if (tmp.size > AUTOFS_DEV_IOCTL_SIZE + PATH_MAX) return ERR_PTR(-ENAMETOOLONG); res = memdup_user(in, tmp.size); if (!IS_ERR(res)) res->size = tmp.size; return res; } static inline void free_dev_ioctl(struct autofs_dev_ioctl *param) { kfree(param); } /* * Check sanity of parameter control fields and if a path is present * check that it is terminated and contains at least one "/". */ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) { unsigned int inr = _IOC_NR(cmd); int err; err = check_dev_ioctl_version(cmd, param); if (err) { pr_warn("invalid device control module version " "supplied for cmd(0x%08x)\n", cmd); goto out; } if (param->size > AUTOFS_DEV_IOCTL_SIZE) { err = invalid_str(param->path, param->size - AUTOFS_DEV_IOCTL_SIZE); if (err) { pr_warn( "path string terminator missing for cmd(0x%08x)\n", cmd); goto out; } /* Setting the per-dentry expire timeout requires a trailing * path component, ie. no '/', so invert the logic of the * check_name() return for AUTOFS_DEV_IOCTL_TIMEOUT_CMD. */ err = check_name(param->path); if (inr == AUTOFS_DEV_IOCTL_TIMEOUT_CMD) err = err ? 0 : -EINVAL; if (err) { pr_warn("invalid path supplied for cmd(0x%08x)\n", cmd); goto out; } } else { if (inr == AUTOFS_DEV_IOCTL_OPENMOUNT_CMD || inr == AUTOFS_DEV_IOCTL_REQUESTER_CMD || inr == AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD) { err = -EINVAL; goto out; } } err = 0; out: return err; } /* Return autofs dev ioctl version */ static int autofs_dev_ioctl_version(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { /* This should have already been set. */ param->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR; param->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR; return 0; } /* Return autofs module protocol version */ static int autofs_dev_ioctl_protover(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { param->protover.version = sbi->version; return 0; } /* Return autofs module protocol sub version */ static int autofs_dev_ioctl_protosubver(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { param->protosubver.sub_version = sbi->sub_version; return 0; } /* Find the topmost mount satisfying test() */ static int find_autofs_mount(const char *pathname, struct path *res, int test(const struct path *path, void *data), void *data) { struct path path; int err; err = kern_path(pathname, LOOKUP_MOUNTPOINT, &path); if (err) return err; err = -ENOENT; while (path.dentry == path.mnt->mnt_root) { if (path.dentry->d_sb->s_magic == AUTOFS_SUPER_MAGIC) { if (test(&path, data)) { path_get(&path); *res = path; err = 0; break; } } if (!follow_up(&path)) break; } path_put(&path); return err; } static int test_by_dev(const struct path *path, void *p) { return path->dentry->d_sb->s_dev == *(dev_t *)p; } static int test_by_type(const struct path *path, void *p) { struct autofs_info *ino = autofs_dentry_ino(path->dentry); return ino && ino->sbi->type & *(unsigned *)p; } /* * Open a file descriptor on the autofs mount point corresponding * to the given path and device number (aka. new_encode_dev(sb->s_dev)). */ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid) { int err, fd; fd = get_unused_fd_flags(O_CLOEXEC); if (likely(fd >= 0)) { struct file *filp; struct path path; err = find_autofs_mount(name, &path, test_by_dev, &devid); if (err) goto out; filp = dentry_open(&path, O_RDONLY, current_cred()); path_put(&path); if (IS_ERR(filp)) { err = PTR_ERR(filp); goto out; } fd_install(fd, filp); } return fd; out: put_unused_fd(fd); return err; } /* Open a file descriptor on an autofs mount point */ static int autofs_dev_ioctl_openmount(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { const char *path; dev_t devid; int err, fd; /* param->path has been checked in validate_dev_ioctl() */ if (!param->openmount.devid) return -EINVAL; param->ioctlfd = -1; path = param->path; devid = new_decode_dev(param->openmount.devid); err = 0; fd = autofs_dev_ioctl_open_mountpoint(path, devid); if (unlikely(fd < 0)) { err = fd; goto out; } param->ioctlfd = fd; out: return err; } /* Close file descriptor allocated above (user can also use close(2)). */ static int autofs_dev_ioctl_closemount(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { return close_fd(param->ioctlfd); } /* * Send "ready" status for an existing wait (either a mount or an expire * request). */ static int autofs_dev_ioctl_ready(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { autofs_wqt_t token; token = (autofs_wqt_t) param->ready.token; return autofs_wait_release(sbi, token, 0); } /* * Send "fail" status for an existing wait (either a mount or an expire * request). */ static int autofs_dev_ioctl_fail(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { autofs_wqt_t token; int status; token = (autofs_wqt_t) param->fail.token; status = param->fail.status < 0 ? param->fail.status : -ENOENT; return autofs_wait_release(sbi, token, status); } /* * Set the pipe fd for kernel communication to the daemon. * * Normally this is set at mount using an option but if we * are reconnecting to a busy mount then we need to use this * to tell the autofs mount about the new kernel pipe fd. In * order to protect mounts against incorrectly setting the * pipefd we also require that the autofs mount be catatonic. * * This also sets the process group id used to identify the * controlling process (eg. the owning automount(8) daemon). */ static int autofs_dev_ioctl_setpipefd(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { int pipefd; int err = 0; struct pid *new_pid = NULL; if (param->setpipefd.pipefd == -1) return -EINVAL; pipefd = param->setpipefd.pipefd; mutex_lock(&sbi->wq_mutex); if (!(sbi->flags & AUTOFS_SBI_CATATONIC)) { mutex_unlock(&sbi->wq_mutex); return -EBUSY; } else { struct file *pipe; new_pid = get_task_pid(current, PIDTYPE_PGID); if (ns_of_pid(new_pid) != ns_of_pid(sbi->oz_pgrp)) { pr_warn("not allowed to change PID namespace\n"); err = -EINVAL; goto out; } pipe = fget(pipefd); if (!pipe) { err = -EBADF; goto out; } if (autofs_prepare_pipe(pipe) < 0) { err = -EPIPE; fput(pipe); goto out; } swap(sbi->oz_pgrp, new_pid); sbi->pipefd = pipefd; sbi->pipe = pipe; sbi->mnt_ns_id = to_ns_common(current->nsproxy->mnt_ns)->ns_id; sbi->flags &= ~AUTOFS_SBI_CATATONIC; } out: put_pid(new_pid); mutex_unlock(&sbi->wq_mutex); return err; } /* * Make the autofs mount point catatonic, no longer responsive to * mount requests. Also closes the kernel pipe file descriptor. */ static int autofs_dev_ioctl_catatonic(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { autofs_catatonic_mode(sbi); return 0; } /* * Set the autofs mount expire timeout. * * There are two places an expire timeout can be set, in the autofs * super block info. (this is all that's needed for direct and offset * mounts because there's a distinct mount corresponding to each of * these) and per-dentry within within the dentry info. If a per-dentry * timeout is set it will override the expire timeout set in the parent * autofs super block info. * * If setting the autofs super block expire timeout the autofs_dev_ioctl * size field will be equal to the autofs_dev_ioctl structure size. If * setting the per-dentry expire timeout the mount point name is passed * in the autofs_dev_ioctl path field and the size field updated to * reflect this. * * Setting the autofs mount expire timeout sets the timeout in the super * block info. struct. Setting the per-dentry timeout does a little more. * If the timeout is equal to -1 the per-dentry timeout (and flag) is * cleared which reverts to using the super block timeout, otherwise if * timeout is 0 the timeout is set to this value and the flag is left * set which disables expiration for the mount point, lastly the flag * and the timeout are set enabling the dentry to use this timeout. */ static int autofs_dev_ioctl_timeout(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { unsigned long timeout = param->timeout.timeout; /* If setting the expire timeout for an individual indirect * mount point dentry the mount trailing component path is * placed in param->path and param->size adjusted to account * for it otherwise param->size it is set to the structure * size. */ if (param->size == AUTOFS_DEV_IOCTL_SIZE) { param->timeout.timeout = sbi->exp_timeout / HZ; sbi->exp_timeout = timeout * HZ; } else { struct dentry *base = fp->f_path.dentry; int path_len = param->size - AUTOFS_DEV_IOCTL_SIZE - 1; struct dentry *dentry; struct autofs_info *ino; if (!autofs_type_indirect(sbi->type)) return -EINVAL; /* An expire timeout greater than the superblock timeout * could be a problem at shutdown but the super block * timeout itself can change so all we can really do is * warn the user. */ if (timeout >= sbi->exp_timeout) pr_warn("per-mount expire timeout is greater than " "the parent autofs mount timeout which could " "prevent shutdown\n"); dentry = try_lookup_noperm(&QSTR_LEN(param->path, path_len), base); if (IS_ERR_OR_NULL(dentry)) return dentry ? PTR_ERR(dentry) : -ENOENT; ino = autofs_dentry_ino(dentry); if (!ino) { dput(dentry); return -ENOENT; } if (ino->exp_timeout && ino->flags & AUTOFS_INF_EXPIRE_SET) param->timeout.timeout = ino->exp_timeout / HZ; else param->timeout.timeout = sbi->exp_timeout / HZ; if (timeout == -1) { /* Revert to using the super block timeout */ ino->flags &= ~AUTOFS_INF_EXPIRE_SET; ino->exp_timeout = 0; } else { /* Set the dentry expire flag and timeout. * * If timeout is 0 it will prevent the expire * of this particular automount. */ ino->flags |= AUTOFS_INF_EXPIRE_SET; ino->exp_timeout = timeout * HZ; } dput(dentry); } return 0; } /* * Return the uid and gid of the last request for the mount * * When reconstructing an autofs mount tree with active mounts * we need to re-connect to mounts that may have used the original * process uid and gid (or string variations of them) for mount * lookups within the map entry. */ static int autofs_dev_ioctl_requester(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { struct autofs_info *ino; struct path path; dev_t devid; int err = -ENOENT; /* param->path has been checked in validate_dev_ioctl() */ devid = sbi->sb->s_dev; param->requester.uid = param->requester.gid = -1; err = find_autofs_mount(param->path, &path, test_by_dev, &devid); if (err) goto out; ino = autofs_dentry_ino(path.dentry); if (ino) { err = 0; autofs_expire_wait(&path, 0); spin_lock(&sbi->fs_lock); param->requester.uid = from_kuid_munged(current_user_ns(), ino->uid); param->requester.gid = from_kgid_munged(current_user_ns(), ino->gid); spin_unlock(&sbi->fs_lock); } path_put(&path); out: return err; } /* * Call repeatedly until it returns -EAGAIN, meaning there's nothing * more that can be done. */ static int autofs_dev_ioctl_expire(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { struct vfsmount *mnt; int how; how = param->expire.how; mnt = fp->f_path.mnt; return autofs_do_expire_multi(sbi->sb, mnt, sbi, how); } /* Check if autofs mount point is in use */ static int autofs_dev_ioctl_askumount(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { param->askumount.may_umount = 0; if (may_umount(fp->f_path.mnt)) param->askumount.may_umount = 1; return 0; } /* * Check if the given path is a mountpoint. * * If we are supplied with the file descriptor of an autofs * mount we're looking for a specific mount. In this case * the path is considered a mountpoint if it is itself a * mountpoint or contains a mount, such as a multi-mount * without a root mount. In this case we return 1 if the * path is a mount point and the super magic of the covering * mount if there is one or 0 if it isn't a mountpoint. * * If we aren't supplied with a file descriptor then we * lookup the path and check if it is the root of a mount. * If a type is given we are looking for a particular autofs * mount and if we don't find a match we return fail. If the * located path is the root of a mount we return 1 along with * the super magic of the mount or 0 otherwise. * * In both cases the device number (as returned by * new_encode_dev()) is also returned. */ static int autofs_dev_ioctl_ismountpoint(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { struct path path; const char *name; unsigned int type; unsigned int devid, magic; int err = -ENOENT; /* param->path has been checked in validate_dev_ioctl() */ name = param->path; type = param->ismountpoint.in.type; param->ismountpoint.out.devid = devid = 0; param->ismountpoint.out.magic = magic = 0; if (!fp || param->ioctlfd == -1) { if (autofs_type_any(type)) err = kern_path(name, LOOKUP_FOLLOW | LOOKUP_MOUNTPOINT, &path); else err = find_autofs_mount(name, &path, test_by_type, &type); if (err) goto out; devid = new_encode_dev(path.dentry->d_sb->s_dev); err = 0; if (path.mnt->mnt_root == path.dentry) { err = 1; magic = path.dentry->d_sb->s_magic; } } else { dev_t dev = sbi->sb->s_dev; err = find_autofs_mount(name, &path, test_by_dev, &dev); if (err) goto out; devid = new_encode_dev(dev); err = path_has_submounts(&path); if (follow_down_one(&path)) magic = path.dentry->d_sb->s_magic; } param->ismountpoint.out.devid = devid; param->ismountpoint.out.magic = magic; path_put(&path); out: return err; } /* * Our range of ioctl numbers isn't 0 based so we need to shift * the array index by _IOC_NR(AUTOFS_CTL_IOC_FIRST) for the table * lookup. */ #define cmd_idx(cmd) (cmd - _IOC_NR(AUTOFS_DEV_IOCTL_IOC_FIRST)) static ioctl_fn lookup_dev_ioctl(unsigned int cmd) { static const ioctl_fn _ioctls[] = { autofs_dev_ioctl_version, autofs_dev_ioctl_protover, autofs_dev_ioctl_protosubver, autofs_dev_ioctl_openmount, autofs_dev_ioctl_closemount, autofs_dev_ioctl_ready, autofs_dev_ioctl_fail, autofs_dev_ioctl_setpipefd, autofs_dev_ioctl_catatonic, autofs_dev_ioctl_timeout, autofs_dev_ioctl_requester, autofs_dev_ioctl_expire, autofs_dev_ioctl_askumount, autofs_dev_ioctl_ismountpoint, }; unsigned int idx = cmd_idx(cmd); if (idx >= ARRAY_SIZE(_ioctls)) return NULL; idx = array_index_nospec(idx, ARRAY_SIZE(_ioctls)); return _ioctls[idx]; } /* ioctl dispatcher */ static int _autofs_dev_ioctl(unsigned int command, struct autofs_dev_ioctl __user *user) { struct autofs_dev_ioctl *param; struct file *fp; struct autofs_sb_info *sbi; unsigned int cmd_first, cmd; ioctl_fn fn = NULL; int err = 0; cmd_first = _IOC_NR(AUTOFS_DEV_IOCTL_IOC_FIRST); cmd = _IOC_NR(command); if (_IOC_TYPE(command) != _IOC_TYPE(AUTOFS_DEV_IOCTL_IOC_FIRST) || cmd - cmd_first > AUTOFS_DEV_IOCTL_IOC_COUNT) { return -ENOTTY; } /* Only root can use ioctls other than AUTOFS_DEV_IOCTL_VERSION_CMD * and AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD */ if (cmd != AUTOFS_DEV_IOCTL_VERSION_CMD && cmd != AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD && !capable(CAP_SYS_ADMIN)) return -EPERM; /* Copy the parameters into kernel space. */ param = copy_dev_ioctl(user); if (IS_ERR(param)) return PTR_ERR(param); err = validate_dev_ioctl(command, param); if (err) goto out; fn = lookup_dev_ioctl(cmd); if (!fn) { pr_warn("unknown command 0x%08x\n", command); err = -ENOTTY; goto out; } fp = NULL; sbi = NULL; /* * For obvious reasons the openmount can't have a file * descriptor yet. We don't take a reference to the * file during close to allow for immediate release, * and the same for retrieving ioctl version. */ if (cmd != AUTOFS_DEV_IOCTL_VERSION_CMD && cmd != AUTOFS_DEV_IOCTL_OPENMOUNT_CMD && cmd != AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD) { struct super_block *sb; fp = fget(param->ioctlfd); if (!fp) { if (cmd == AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD) goto cont; err = -EBADF; goto out; } sb = file_inode(fp)->i_sb; if (sb->s_type != &autofs_fs_type) { err = -EINVAL; fput(fp); goto out; } sbi = autofs_sbi(sb); /* * Admin needs to be able to set the mount catatonic in * order to be able to perform the re-open. */ if (!autofs_oz_mode(sbi) && cmd != AUTOFS_DEV_IOCTL_CATATONIC_CMD) { err = -EACCES; fput(fp); goto out; } } cont: err = fn(fp, sbi, param); if (fp) fput(fp); if (err >= 0 && copy_to_user(user, param, AUTOFS_DEV_IOCTL_SIZE)) err = -EFAULT; out: free_dev_ioctl(param); return err; } static long autofs_dev_ioctl(struct file *file, unsigned int command, unsigned long u) { int err; err = _autofs_dev_ioctl(command, (struct autofs_dev_ioctl __user *) u); return (long) err; } #ifdef CONFIG_COMPAT static long autofs_dev_ioctl_compat(struct file *file, unsigned int command, unsigned long u) { return autofs_dev_ioctl(file, command, (unsigned long) compat_ptr(u)); } #else #define autofs_dev_ioctl_compat NULL #endif static const struct file_operations _dev_ioctl_fops = { .unlocked_ioctl = autofs_dev_ioctl, .compat_ioctl = autofs_dev_ioctl_compat, .owner = THIS_MODULE, .llseek = noop_llseek, }; static struct miscdevice _autofs_dev_ioctl_misc = { .minor = AUTOFS_MINOR, .name = AUTOFS_DEVICE_NAME, .fops = &_dev_ioctl_fops, .mode = 0644, }; MODULE_ALIAS_MISCDEV(AUTOFS_MINOR); MODULE_ALIAS("devname:autofs"); /* Register/deregister misc character device */ int __init autofs_dev_ioctl_init(void) { int r; r = misc_register(&_autofs_dev_ioctl_misc); if (r) { pr_err("misc_register failed for control device\n"); return r; } return 0; } void autofs_dev_ioctl_exit(void) { misc_deregister(&_autofs_dev_ioctl_misc); }
3 3 3 1 3 2 5 2 3 1 1 2 8 1 3 5 2 3 4 1 1 3 2 4 1 5 3 2 1 4 1 4 2 3 4 4 2 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 // SPDX-License-Identifier: GPL-2.0-or-later /* * USB Synaptics device driver * * Copyright (c) 2002 Rob Miller (rob@inpharmatica . co . uk) * Copyright (c) 2003 Ron Lee (ron@debian.org) * cPad driver for kernel 2.4 * * Copyright (c) 2004 Jan Steinhoff (cpad@jan-steinhoff . de) * Copyright (c) 2004 Ron Lee (ron@debian.org) * rewritten for kernel 2.6 * * cPad display character device part is not included. It can be found at * http://jan-steinhoff.de/linux/synaptics-usb.html * * Bases on: usb_skeleton.c v2.2 by Greg Kroah-Hartman * drivers/hid/usbhid/usbmouse.c by Vojtech Pavlik * drivers/input/mouse/synaptics.c by Peter Osterlund * * Trademarks are the property of their respective owners. */ /* * There are three different types of Synaptics USB devices: Touchpads, * touchsticks (or trackpoints), and touchscreens. Touchpads are well supported * by this driver, touchstick support has not been tested much yet, and * touchscreens have not been tested at all. * * Up to three alternate settings are possible: * setting 0: one int endpoint for relative movement (used by usbhid.ko) * setting 1: one int endpoint for absolute finger position * setting 2 (cPad only): one int endpoint for absolute finger position and * two bulk endpoints for the display (in/out) * This driver uses setting 1. */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/usb.h> #include <linux/input.h> #include <linux/usb/input.h> #define USB_VENDOR_ID_SYNAPTICS 0x06cb #define USB_DEVICE_ID_SYNAPTICS_TP 0x0001 /* Synaptics USB TouchPad */ #define USB_DEVICE_ID_SYNAPTICS_INT_TP 0x0002 /* Integrated USB TouchPad */ #define USB_DEVICE_ID_SYNAPTICS_CPAD 0x0003 /* Synaptics cPad */ #define USB_DEVICE_ID_SYNAPTICS_TS 0x0006 /* Synaptics TouchScreen */ #define USB_DEVICE_ID_SYNAPTICS_STICK 0x0007 /* Synaptics USB Styk */ #define USB_DEVICE_ID_SYNAPTICS_WP 0x0008 /* Synaptics USB WheelPad */ #define USB_DEVICE_ID_SYNAPTICS_COMP_TP 0x0009 /* Composite USB TouchPad */ #define USB_DEVICE_ID_SYNAPTICS_WTP 0x0010 /* Wireless TouchPad */ #define USB_DEVICE_ID_SYNAPTICS_DPAD 0x0013 /* DisplayPad */ #define SYNUSB_TOUCHPAD (1 << 0) #define SYNUSB_STICK (1 << 1) #define SYNUSB_TOUCHSCREEN (1 << 2) #define SYNUSB_AUXDISPLAY (1 << 3) /* For cPad */ #define SYNUSB_COMBO (1 << 4) /* Composite device (TP + stick) */ #define SYNUSB_IO_ALWAYS (1 << 5) #define USB_DEVICE_SYNAPTICS(prod, kind) \ USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, \ USB_DEVICE_ID_SYNAPTICS_##prod), \ .driver_info = (kind), #define SYNUSB_RECV_SIZE 8 #define XMIN_NOMINAL 1472 #define XMAX_NOMINAL 5472 #define YMIN_NOMINAL 1408 #define YMAX_NOMINAL 4448 struct synusb { struct usb_device *udev; struct usb_interface *intf; struct urb *urb; unsigned char *data; /* serialize access to open/suspend */ struct mutex pm_mutex; bool is_open; /* input device related data structures */ struct input_dev *input; char name[128]; char phys[64]; /* characteristics of the device */ unsigned long flags; }; static void synusb_report_buttons(struct synusb *synusb) { struct input_dev *input_dev = synusb->input; input_report_key(input_dev, BTN_LEFT, synusb->data[1] & 0x04); input_report_key(input_dev, BTN_RIGHT, synusb->data[1] & 0x01); input_report_key(input_dev, BTN_MIDDLE, synusb->data[1] & 0x02); } static void synusb_report_stick(struct synusb *synusb) { struct input_dev *input_dev = synusb->input; int x, y; unsigned int pressure; pressure = synusb->data[6]; x = (s16)(be16_to_cpup((__be16 *)&synusb->data[2]) << 3) >> 7; y = (s16)(be16_to_cpup((__be16 *)&synusb->data[4]) << 3) >> 7; if (pressure > 0) { input_report_rel(input_dev, REL_X, x); input_report_rel(input_dev, REL_Y, -y); } input_report_abs(input_dev, ABS_PRESSURE, pressure); synusb_report_buttons(synusb); input_sync(input_dev); } static void synusb_report_touchpad(struct synusb *synusb) { struct input_dev *input_dev = synusb->input; unsigned int num_fingers, tool_width; unsigned int x, y; unsigned int pressure, w; pressure = synusb->data[6]; x = be16_to_cpup((__be16 *)&synusb->data[2]); y = be16_to_cpup((__be16 *)&synusb->data[4]); w = synusb->data[0] & 0x0f; if (pressure > 0) { num_fingers = 1; tool_width = 5; switch (w) { case 0 ... 1: num_fingers = 2 + w; break; case 2: /* pen, pretend its a finger */ break; case 4 ... 15: tool_width = w; break; } } else { num_fingers = 0; tool_width = 0; } /* * Post events * BTN_TOUCH has to be first as mousedev relies on it when doing * absolute -> relative conversion */ if (pressure > 30) input_report_key(input_dev, BTN_TOUCH, 1); if (pressure < 25) input_report_key(input_dev, BTN_TOUCH, 0); if (num_fingers > 0) { input_report_abs(input_dev, ABS_X, x); input_report_abs(input_dev, ABS_Y, YMAX_NOMINAL + YMIN_NOMINAL - y); } input_report_abs(input_dev, ABS_PRESSURE, pressure); input_report_abs(input_dev, ABS_TOOL_WIDTH, tool_width); input_report_key(input_dev, BTN_TOOL_FINGER, num_fingers == 1); input_report_key(input_dev, BTN_TOOL_DOUBLETAP, num_fingers == 2); input_report_key(input_dev, BTN_TOOL_TRIPLETAP, num_fingers == 3); synusb_report_buttons(synusb); if (synusb->flags & SYNUSB_AUXDISPLAY) input_report_key(input_dev, BTN_MIDDLE, synusb->data[1] & 0x08); input_sync(input_dev); } static void synusb_irq(struct urb *urb) { struct synusb *synusb = urb->context; int error; /* Check our status in case we need to bail out early. */ switch (urb->status) { case 0: usb_mark_last_busy(synusb->udev); break; /* Device went away so don't keep trying to read from it. */ case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: return; default: goto resubmit; break; } if (synusb->flags & SYNUSB_STICK) synusb_report_stick(synusb); else synusb_report_touchpad(synusb); resubmit: error = usb_submit_urb(urb, GFP_ATOMIC); if (error && error != -EPERM) dev_err(&synusb->intf->dev, "%s - usb_submit_urb failed with result: %d", __func__, error); } static struct usb_endpoint_descriptor * synusb_get_in_endpoint(struct usb_host_interface *iface) { struct usb_endpoint_descriptor *endpoint; int i; for (i = 0; i < iface->desc.bNumEndpoints; ++i) { endpoint = &iface->endpoint[i].desc; if (usb_endpoint_is_int_in(endpoint)) { /* we found our interrupt in endpoint */ return endpoint; } } return NULL; } static int synusb_open(struct input_dev *dev) { struct synusb *synusb = input_get_drvdata(dev); int retval; retval = usb_autopm_get_interface(synusb->intf); if (retval) { dev_err(&synusb->intf->dev, "%s - usb_autopm_get_interface failed, error: %d\n", __func__, retval); return retval; } mutex_lock(&synusb->pm_mutex); retval = usb_submit_urb(synusb->urb, GFP_KERNEL); if (retval) { dev_err(&synusb->intf->dev, "%s - usb_submit_urb failed, error: %d\n", __func__, retval); retval = -EIO; goto out; } synusb->intf->needs_remote_wakeup = 1; synusb->is_open = true; out: mutex_unlock(&synusb->pm_mutex); usb_autopm_put_interface(synusb->intf); return retval; } static void synusb_close(struct input_dev *dev) { struct synusb *synusb = input_get_drvdata(dev); int autopm_error; autopm_error = usb_autopm_get_interface(synusb->intf); mutex_lock(&synusb->pm_mutex); usb_kill_urb(synusb->urb); synusb->intf->needs_remote_wakeup = 0; synusb->is_open = false; mutex_unlock(&synusb->pm_mutex); if (!autopm_error) usb_autopm_put_interface(synusb->intf); } static int synusb_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(intf); struct usb_endpoint_descriptor *ep; struct synusb *synusb; struct input_dev *input_dev; unsigned int intf_num = intf->cur_altsetting->desc.bInterfaceNumber; unsigned int altsetting = min(intf->num_altsetting, 1U); int error; error = usb_set_interface(udev, intf_num, altsetting); if (error) { dev_err(&udev->dev, "Can not set alternate setting to %i, error: %i", altsetting, error); return error; } ep = synusb_get_in_endpoint(intf->cur_altsetting); if (!ep) return -ENODEV; synusb = kzalloc(sizeof(*synusb), GFP_KERNEL); input_dev = input_allocate_device(); if (!synusb || !input_dev) { error = -ENOMEM; goto err_free_mem; } synusb->udev = udev; synusb->intf = intf; synusb->input = input_dev; mutex_init(&synusb->pm_mutex); synusb->flags = id->driver_info; if (synusb->flags & SYNUSB_COMBO) { /* * This is a combo device, we need to set proper * capability, depending on the interface. */ synusb->flags |= intf_num == 1 ? SYNUSB_STICK : SYNUSB_TOUCHPAD; } synusb->urb = usb_alloc_urb(0, GFP_KERNEL); if (!synusb->urb) { error = -ENOMEM; goto err_free_mem; } synusb->data = usb_alloc_coherent(udev, SYNUSB_RECV_SIZE, GFP_KERNEL, &synusb->urb->transfer_dma); if (!synusb->data) { error = -ENOMEM; goto err_free_urb; } usb_fill_int_urb(synusb->urb, udev, usb_rcvintpipe(udev, ep->bEndpointAddress), synusb->data, SYNUSB_RECV_SIZE, synusb_irq, synusb, ep->bInterval); synusb->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; if (udev->manufacturer) strscpy(synusb->name, udev->manufacturer, sizeof(synusb->name)); if (udev->product) { if (udev->manufacturer) strlcat(synusb->name, " ", sizeof(synusb->name)); strlcat(synusb->name, udev->product, sizeof(synusb->name)); } if (!strlen(synusb->name)) snprintf(synusb->name, sizeof(synusb->name), "USB Synaptics Device %04x:%04x", le16_to_cpu(udev->descriptor.idVendor), le16_to_cpu(udev->descriptor.idProduct)); if (synusb->flags & SYNUSB_STICK) strlcat(synusb->name, " (Stick)", sizeof(synusb->name)); usb_make_path(udev, synusb->phys, sizeof(synusb->phys)); strlcat(synusb->phys, "/input0", sizeof(synusb->phys)); input_dev->name = synusb->name; input_dev->phys = synusb->phys; usb_to_input_id(udev, &input_dev->id); input_dev->dev.parent = &synusb->intf->dev; if (!(synusb->flags & SYNUSB_IO_ALWAYS)) { input_dev->open = synusb_open; input_dev->close = synusb_close; } input_set_drvdata(input_dev, synusb); __set_bit(EV_ABS, input_dev->evbit); __set_bit(EV_KEY, input_dev->evbit); if (synusb->flags & SYNUSB_STICK) { __set_bit(EV_REL, input_dev->evbit); __set_bit(REL_X, input_dev->relbit); __set_bit(REL_Y, input_dev->relbit); __set_bit(INPUT_PROP_POINTING_STICK, input_dev->propbit); input_set_abs_params(input_dev, ABS_PRESSURE, 0, 127, 0, 0); } else { input_set_abs_params(input_dev, ABS_X, XMIN_NOMINAL, XMAX_NOMINAL, 0, 0); input_set_abs_params(input_dev, ABS_Y, YMIN_NOMINAL, YMAX_NOMINAL, 0, 0); input_set_abs_params(input_dev, ABS_PRESSURE, 0, 255, 0, 0); input_set_abs_params(input_dev, ABS_TOOL_WIDTH, 0, 15, 0, 0); __set_bit(BTN_TOUCH, input_dev->keybit); __set_bit(BTN_TOOL_FINGER, input_dev->keybit); __set_bit(BTN_TOOL_DOUBLETAP, input_dev->keybit); __set_bit(BTN_TOOL_TRIPLETAP, input_dev->keybit); } if (synusb->flags & SYNUSB_TOUCHSCREEN) __set_bit(INPUT_PROP_DIRECT, input_dev->propbit); else __set_bit(INPUT_PROP_POINTER, input_dev->propbit); __set_bit(BTN_LEFT, input_dev->keybit); __set_bit(BTN_RIGHT, input_dev->keybit); __set_bit(BTN_MIDDLE, input_dev->keybit); usb_set_intfdata(intf, synusb); if (synusb->flags & SYNUSB_IO_ALWAYS) { error = synusb_open(input_dev); if (error) goto err_free_dma; } error = input_register_device(input_dev); if (error) { dev_err(&udev->dev, "Failed to register input device, error %d\n", error); goto err_stop_io; } return 0; err_stop_io: if (synusb->flags & SYNUSB_IO_ALWAYS) synusb_close(synusb->input); err_free_dma: usb_free_coherent(udev, SYNUSB_RECV_SIZE, synusb->data, synusb->urb->transfer_dma); err_free_urb: usb_free_urb(synusb->urb); err_free_mem: input_free_device(input_dev); kfree(synusb); usb_set_intfdata(intf, NULL); return error; } static void synusb_disconnect(struct usb_interface *intf) { struct synusb *synusb = usb_get_intfdata(intf); struct usb_device *udev = interface_to_usbdev(intf); if (synusb->flags & SYNUSB_IO_ALWAYS) synusb_close(synusb->input); input_unregister_device(synusb->input); usb_free_coherent(udev, SYNUSB_RECV_SIZE, synusb->data, synusb->urb->transfer_dma); usb_free_urb(synusb->urb); kfree(synusb); usb_set_intfdata(intf, NULL); } static int synusb_suspend(struct usb_interface *intf, pm_message_t message) { struct synusb *synusb = usb_get_intfdata(intf); mutex_lock(&synusb->pm_mutex); usb_kill_urb(synusb->urb); mutex_unlock(&synusb->pm_mutex); return 0; } static int synusb_resume(struct usb_interface *intf) { struct synusb *synusb = usb_get_intfdata(intf); int retval = 0; mutex_lock(&synusb->pm_mutex); if ((synusb->is_open || (synusb->flags & SYNUSB_IO_ALWAYS)) && usb_submit_urb(synusb->urb, GFP_NOIO) < 0) { retval = -EIO; } mutex_unlock(&synusb->pm_mutex); return retval; } static int synusb_pre_reset(struct usb_interface *intf) { struct synusb *synusb = usb_get_intfdata(intf); mutex_lock(&synusb->pm_mutex); usb_kill_urb(synusb->urb); return 0; } static int synusb_post_reset(struct usb_interface *intf) { struct synusb *synusb = usb_get_intfdata(intf); int retval = 0; if ((synusb->is_open || (synusb->flags & SYNUSB_IO_ALWAYS)) && usb_submit_urb(synusb->urb, GFP_NOIO) < 0) { retval = -EIO; } mutex_unlock(&synusb->pm_mutex); return retval; } static int synusb_reset_resume(struct usb_interface *intf) { return synusb_resume(intf); } static const struct usb_device_id synusb_idtable[] = { { USB_DEVICE_SYNAPTICS(TP, SYNUSB_TOUCHPAD) }, { USB_DEVICE_SYNAPTICS(INT_TP, SYNUSB_TOUCHPAD) }, { USB_DEVICE_SYNAPTICS(CPAD, SYNUSB_TOUCHPAD | SYNUSB_AUXDISPLAY | SYNUSB_IO_ALWAYS) }, { USB_DEVICE_SYNAPTICS(TS, SYNUSB_TOUCHSCREEN) }, { USB_DEVICE_SYNAPTICS(STICK, SYNUSB_STICK) }, { USB_DEVICE_SYNAPTICS(WP, SYNUSB_TOUCHPAD) }, { USB_DEVICE_SYNAPTICS(COMP_TP, SYNUSB_COMBO) }, { USB_DEVICE_SYNAPTICS(WTP, SYNUSB_TOUCHPAD) }, { USB_DEVICE_SYNAPTICS(DPAD, SYNUSB_TOUCHPAD) }, { } }; MODULE_DEVICE_TABLE(usb, synusb_idtable); static struct usb_driver synusb_driver = { .name = "synaptics_usb", .probe = synusb_probe, .disconnect = synusb_disconnect, .id_table = synusb_idtable, .suspend = synusb_suspend, .resume = synusb_resume, .pre_reset = synusb_pre_reset, .post_reset = synusb_post_reset, .reset_resume = synusb_reset_resume, .supports_autosuspend = 1, }; module_usb_driver(synusb_driver); MODULE_AUTHOR("Rob Miller <rob@inpharmatica.co.uk>, " "Ron Lee <ron@debian.org>, " "Jan Steinhoff <cpad@jan-steinhoff.de>"); MODULE_DESCRIPTION("Synaptics USB device driver"); MODULE_LICENSE("GPL");
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PROJID_H #define _LINUX_PROJID_H /* * A set of types for the internal kernel types representing project ids. * * The types defined in this header allow distinguishing which project ids in * the kernel are values used by userspace and which project id values are * the internal kernel values. With the addition of user namespaces the values * can be different. Using the type system makes it possible for the compiler * to detect when we overlook these differences. * */ #include <linux/types.h> struct user_namespace; extern struct user_namespace init_user_ns; typedef __kernel_uid32_t projid_t; typedef struct { projid_t val; } kprojid_t; static inline projid_t __kprojid_val(kprojid_t projid) { return projid.val; } #define KPROJIDT_INIT(value) (kprojid_t){ value } #define INVALID_PROJID KPROJIDT_INIT(-1) #define OVERFLOW_PROJID 65534 static inline bool projid_eq(kprojid_t left, kprojid_t right) { return __kprojid_val(left) == __kprojid_val(right); } static inline bool projid_lt(kprojid_t left, kprojid_t right) { return __kprojid_val(left) < __kprojid_val(right); } static inline bool projid_valid(kprojid_t projid) { return !projid_eq(projid, INVALID_PROJID); } #ifdef CONFIG_USER_NS extern kprojid_t make_kprojid(struct user_namespace *from, projid_t projid); extern projid_t from_kprojid(struct user_namespace *to, kprojid_t projid); extern projid_t from_kprojid_munged(struct user_namespace *to, kprojid_t projid); static inline bool kprojid_has_mapping(struct user_namespace *ns, kprojid_t projid) { return from_kprojid(ns, projid) != (projid_t)-1; } #else static inline kprojid_t make_kprojid(struct user_namespace *from, projid_t projid) { return KPROJIDT_INIT(projid); } static inline projid_t from_kprojid(struct user_namespace *to, kprojid_t kprojid) { return __kprojid_val(kprojid); } static inline projid_t from_kprojid_munged(struct user_namespace *to, kprojid_t kprojid) { projid_t projid = from_kprojid(to, kprojid); if (projid == (projid_t)-1) projid = OVERFLOW_PROJID; return projid; } static inline bool kprojid_has_mapping(struct user_namespace *ns, kprojid_t projid) { return true; } #endif /* CONFIG_USER_NS */ #endif /* _LINUX_PROJID_H */
34 33 34 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2003-2005 Devicescape Software, Inc. * Copyright (c) 2006 Jiri Benc <jbenc@suse.cz> * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2016 Intel Deutschland GmbH * Copyright (C) 2018 - 2023 Intel Corporation */ #include <linux/debugfs.h> #include <linux/ieee80211.h> #include "ieee80211_i.h" #include "debugfs.h" #include "debugfs_sta.h" #include "sta_info.h" #include "driver-ops.h" /* sta attributes */ #define STA_READ(name, field, format_string) \ static ssize_t sta_ ##name## _read(struct file *file, \ char __user *userbuf, \ size_t count, loff_t *ppos) \ { \ struct sta_info *sta = file->private_data; \ return mac80211_format_buffer(userbuf, count, ppos, \ format_string, sta->field); \ } #define STA_READ_D(name, field) STA_READ(name, field, "%d\n") #define STA_OPS(name) \ static const struct debugfs_short_fops sta_ ##name## _ops = { \ .read = sta_##name##_read, \ .llseek = generic_file_llseek, \ } #define STA_OPS_RW(name) \ static const struct debugfs_short_fops sta_ ##name## _ops = { \ .read = sta_##name##_read, \ .write = sta_##name##_write, \ .llseek = generic_file_llseek, \ } #define STA_FILE(name, field, format) \ STA_READ_##format(name, field) \ STA_OPS(name) STA_FILE(aid, sta.aid, D); static const char * const sta_flag_names[] = { #define FLAG(F) [WLAN_STA_##F] = #F FLAG(AUTH), FLAG(ASSOC), FLAG(PS_STA), FLAG(AUTHORIZED), FLAG(SHORT_PREAMBLE), FLAG(WDS), FLAG(CLEAR_PS_FILT), FLAG(MFP), FLAG(BLOCK_BA), FLAG(PS_DRIVER), FLAG(PSPOLL), FLAG(TDLS_PEER), FLAG(TDLS_PEER_AUTH), FLAG(TDLS_INITIATOR), FLAG(TDLS_CHAN_SWITCH), FLAG(TDLS_OFF_CHANNEL), FLAG(TDLS_WIDER_BW), FLAG(UAPSD), FLAG(SP), FLAG(4ADDR_EVENT), FLAG(INSERTED), FLAG(RATE_CONTROL), FLAG(TOFFSET_KNOWN), FLAG(MPSP_OWNER), FLAG(MPSP_RECIPIENT), FLAG(PS_DELIVER), FLAG(USES_ENCRYPTION), FLAG(DECAP_OFFLOAD), #undef FLAG }; static ssize_t sta_flags_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { char buf[16 * NUM_WLAN_STA_FLAGS], *pos = buf; char *end = buf + sizeof(buf) - 1; struct sta_info *sta = file->private_data; unsigned int flg; BUILD_BUG_ON(ARRAY_SIZE(sta_flag_names) != NUM_WLAN_STA_FLAGS); for (flg = 0; flg < NUM_WLAN_STA_FLAGS; flg++) { if (test_sta_flag(sta, flg)) pos += scnprintf(pos, end - pos, "%s\n", sta_flag_names[flg]); } return simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf)); } STA_OPS(flags); static ssize_t sta_num_ps_buf_frames_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { struct sta_info *sta = file->private_data; char buf[17*IEEE80211_NUM_ACS], *p = buf; int ac; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) p += scnprintf(p, sizeof(buf)+buf-p, "AC%d: %d\n", ac, skb_queue_len(&sta->ps_tx_buf[ac]) + skb_queue_len(&sta->tx_filtered[ac])); return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); } STA_OPS(num_ps_buf_frames); static ssize_t sta_last_seq_ctrl_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { char buf[15*IEEE80211_NUM_TIDS], *p = buf; int i; struct sta_info *sta = file->private_data; for (i = 0; i < IEEE80211_NUM_TIDS; i++) p += scnprintf(p, sizeof(buf)+buf-p, "%x ", le16_to_cpu(sta->last_seq_ctrl[i])); p += scnprintf(p, sizeof(buf)+buf-p, "\n"); return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); } STA_OPS(last_seq_ctrl); #define AQM_TXQ_ENTRY_LEN 130 static ssize_t sta_aqm_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { struct sta_info *sta = file->private_data; struct ieee80211_local *local = sta->local; size_t bufsz = AQM_TXQ_ENTRY_LEN * (IEEE80211_NUM_TIDS + 2); char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf; struct txq_info *txqi; ssize_t rv; int i; if (!buf) return -ENOMEM; spin_lock_bh(&local->fq.lock); p += scnprintf(p, bufsz + buf - p, "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets flags\n"); for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { if (!sta->sta.txq[i]) continue; txqi = to_txq_info(sta->sta.txq[i]); p += scnprintf(p, bufsz + buf - p, "%d %d %u %u %u %u %u %u %u %u %u 0x%lx(%s%s%s%s)\n", txqi->txq.tid, txqi->txq.ac, txqi->tin.backlog_bytes, txqi->tin.backlog_packets, txqi->tin.flows, txqi->cstats.drop_count, txqi->cstats.ecn_mark, txqi->tin.overlimit, txqi->tin.collisions, txqi->tin.tx_bytes, txqi->tin.tx_packets, txqi->flags, test_bit(IEEE80211_TXQ_STOP, &txqi->flags) ? "STOP" : "RUN", test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags) ? " AMPDU" : "", test_bit(IEEE80211_TXQ_NO_AMSDU, &txqi->flags) ? " NO-AMSDU" : "", test_bit(IEEE80211_TXQ_DIRTY, &txqi->flags) ? " DIRTY" : ""); } spin_unlock_bh(&local->fq.lock); rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); kfree(buf); return rv; } STA_OPS(aqm); static ssize_t sta_airtime_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { struct sta_info *sta = file->private_data; struct ieee80211_local *local = sta->sdata->local; size_t bufsz = 400; char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf; u64 rx_airtime = 0, tx_airtime = 0; s32 deficit[IEEE80211_NUM_ACS]; ssize_t rv; int ac; if (!buf) return -ENOMEM; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { spin_lock_bh(&local->active_txq_lock[ac]); rx_airtime += sta->airtime[ac].rx_airtime; tx_airtime += sta->airtime[ac].tx_airtime; deficit[ac] = sta->airtime[ac].deficit; spin_unlock_bh(&local->active_txq_lock[ac]); } p += scnprintf(p, bufsz + buf - p, "RX: %llu us\nTX: %llu us\nWeight: %u\n" "Deficit: VO: %d us VI: %d us BE: %d us BK: %d us\n", rx_airtime, tx_airtime, sta->airtime_weight, deficit[0], deficit[1], deficit[2], deficit[3]); rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); kfree(buf); return rv; } static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf, size_t count, loff_t *ppos) { struct sta_info *sta = file->private_data; struct ieee80211_local *local = sta->sdata->local; int ac; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { spin_lock_bh(&local->active_txq_lock[ac]); sta->airtime[ac].rx_airtime = 0; sta->airtime[ac].tx_airtime = 0; sta->airtime[ac].deficit = sta->airtime_weight; spin_unlock_bh(&local->active_txq_lock[ac]); } return count; } STA_OPS_RW(airtime); static ssize_t sta_aql_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { struct sta_info *sta = file->private_data; struct ieee80211_local *local = sta->sdata->local; size_t bufsz = 400; char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf; u32 q_depth[IEEE80211_NUM_ACS]; u32 q_limit_l[IEEE80211_NUM_ACS], q_limit_h[IEEE80211_NUM_ACS]; ssize_t rv; int ac; if (!buf) return -ENOMEM; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { spin_lock_bh(&local->active_txq_lock[ac]); q_limit_l[ac] = sta->airtime[ac].aql_limit_low; q_limit_h[ac] = sta->airtime[ac].aql_limit_high; spin_unlock_bh(&local->active_txq_lock[ac]); q_depth[ac] = atomic_read(&sta->airtime[ac].aql_tx_pending); } p += scnprintf(p, bufsz + buf - p, "Q depth: VO: %u us VI: %u us BE: %u us BK: %u us\n" "Q limit[low/high]: VO: %u/%u VI: %u/%u BE: %u/%u BK: %u/%u\n", q_depth[0], q_depth[1], q_depth[2], q_depth[3], q_limit_l[0], q_limit_h[0], q_limit_l[1], q_limit_h[1], q_limit_l[2], q_limit_h[2], q_limit_l[3], q_limit_h[3]); rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); kfree(buf); return rv; } static ssize_t sta_aql_write(struct file *file, const char __user *userbuf, size_t count, loff_t *ppos) { struct sta_info *sta = file->private_data; u32 ac, q_limit_l, q_limit_h; char _buf[100] = {}, *buf = _buf; if (count > sizeof(_buf)) return -EINVAL; if (copy_from_user(buf, userbuf, count)) return -EFAULT; buf[sizeof(_buf) - 1] = '\0'; if (sscanf(buf, "limit %u %u %u", &ac, &q_limit_l, &q_limit_h) != 3) return -EINVAL; if (ac >= IEEE80211_NUM_ACS) return -EINVAL; sta->airtime[ac].aql_limit_low = q_limit_l; sta->airtime[ac].aql_limit_high = q_limit_h; return count; } STA_OPS_RW(aql); static ssize_t sta_agg_status_do_read(struct wiphy *wiphy, struct file *file, char *buf, size_t bufsz, void *data) { struct sta_info *sta = data; char *p = buf; int i; struct tid_ampdu_rx *tid_rx; struct tid_ampdu_tx *tid_tx; p += scnprintf(p, bufsz + buf - p, "next dialog_token: %#02x\n", sta->ampdu_mlme.dialog_token_allocator + 1); p += scnprintf(p, bufsz + buf - p, "TID\t\tRX\tDTKN\tSSN\t\tTX\tDTKN\tpending\n"); for (i = 0; i < IEEE80211_NUM_TIDS; i++) { bool tid_rx_valid; tid_rx = wiphy_dereference(wiphy, sta->ampdu_mlme.tid_rx[i]); tid_tx = wiphy_dereference(wiphy, sta->ampdu_mlme.tid_tx[i]); tid_rx_valid = test_bit(i, sta->ampdu_mlme.agg_session_valid); p += scnprintf(p, bufsz + buf - p, "%02d", i); p += scnprintf(p, bufsz + buf - p, "\t\t%x", tid_rx_valid); p += scnprintf(p, bufsz + buf - p, "\t%#.2x", tid_rx_valid ? sta->ampdu_mlme.tid_rx_token[i] : 0); p += scnprintf(p, bufsz + buf - p, "\t%#.3x", tid_rx ? tid_rx->ssn : 0); p += scnprintf(p, bufsz + buf - p, "\t\t%x", !!tid_tx); p += scnprintf(p, bufsz + buf - p, "\t%#.2x", tid_tx ? tid_tx->dialog_token : 0); p += scnprintf(p, bufsz + buf - p, "\t%03d", tid_tx ? skb_queue_len(&tid_tx->pending) : 0); p += scnprintf(p, bufsz + buf - p, "\n"); } return p - buf; } static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { struct sta_info *sta = file->private_data; struct wiphy *wiphy = sta->local->hw.wiphy; size_t bufsz = 71 + IEEE80211_NUM_TIDS * 40; char *buf = kmalloc(bufsz, GFP_KERNEL); ssize_t ret; if (!buf) return -ENOMEM; ret = wiphy_locked_debugfs_read(wiphy, file, buf, bufsz, userbuf, count, ppos, sta_agg_status_do_read, sta); kfree(buf); return ret; } static ssize_t sta_agg_status_do_write(struct wiphy *wiphy, struct file *file, char *buf, size_t count, void *data) { struct sta_info *sta = data; bool start, tx; unsigned long tid; char *pos = buf; int ret, timeout = 5000; buf = strsep(&pos, " "); if (!buf) return -EINVAL; if (!strcmp(buf, "tx")) tx = true; else if (!strcmp(buf, "rx")) tx = false; else return -EINVAL; buf = strsep(&pos, " "); if (!buf) return -EINVAL; if (!strcmp(buf, "start")) { start = true; if (!tx) return -EINVAL; } else if (!strcmp(buf, "stop")) { start = false; } else { return -EINVAL; } buf = strsep(&pos, " "); if (!buf) return -EINVAL; if (sscanf(buf, "timeout=%d", &timeout) == 1) { buf = strsep(&pos, " "); if (!buf || !tx || !start) return -EINVAL; } ret = kstrtoul(buf, 0, &tid); if (ret || tid >= IEEE80211_NUM_TIDS) return -EINVAL; if (tx) { if (start) ret = ieee80211_start_tx_ba_session(&sta->sta, tid, timeout); else ret = ieee80211_stop_tx_ba_session(&sta->sta, tid); } else { __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT, 3, true); ret = 0; } return ret ?: count; } static ssize_t sta_agg_status_write(struct file *file, const char __user *userbuf, size_t count, loff_t *ppos) { struct sta_info *sta = file->private_data; struct wiphy *wiphy = sta->local->hw.wiphy; char _buf[26]; return wiphy_locked_debugfs_write(wiphy, file, _buf, sizeof(_buf), userbuf, count, sta_agg_status_do_write, sta); } STA_OPS_RW(agg_status); /* link sta attributes */ #define LINK_STA_OPS(name) \ static const struct debugfs_short_fops link_sta_ ##name## _ops = { \ .read = link_sta_##name##_read, \ .llseek = generic_file_llseek, \ } static ssize_t link_sta_addr_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { struct link_sta_info *link_sta = file->private_data; u8 mac[MAC_ADDR_STR_LEN + 2]; snprintf(mac, sizeof(mac), "%pM\n", link_sta->pub->addr); return simple_read_from_buffer(userbuf, count, ppos, mac, MAC_ADDR_STR_LEN + 1); } LINK_STA_OPS(addr); static ssize_t link_sta_ht_capa_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { #define PRINT_HT_CAP(_cond, _str) \ do { \ if (_cond) \ p += scnprintf(p, bufsz + buf - p, "\t" _str "\n"); \ } while (0) char *buf, *p; int i; ssize_t bufsz = 512; struct link_sta_info *link_sta = file->private_data; struct ieee80211_sta_ht_cap *htc = &link_sta->pub->ht_cap; ssize_t ret; buf = kzalloc(bufsz, GFP_KERNEL); if (!buf) return -ENOMEM; p = buf; p += scnprintf(p, bufsz + buf - p, "ht %ssupported\n", htc->ht_supported ? "" : "not "); if (htc->ht_supported) { p += scnprintf(p, bufsz + buf - p, "cap: %#.4x\n", htc->cap); PRINT_HT_CAP((htc->cap & BIT(0)), "RX LDPC"); PRINT_HT_CAP((htc->cap & BIT(1)), "HT20/HT40"); PRINT_HT_CAP(!(htc->cap & BIT(1)), "HT20"); PRINT_HT_CAP(((htc->cap >> 2) & 0x3) == 0, "Static SM Power Save"); PRINT_HT_CAP(((htc->cap >> 2) & 0x3) == 1, "Dynamic SM Power Save"); PRINT_HT_CAP(((htc->cap >> 2) & 0x3) == 3, "SM Power Save disabled"); PRINT_HT_CAP((htc->cap & BIT(4)), "RX Greenfield"); PRINT_HT_CAP((htc->cap & BIT(5)), "RX HT20 SGI"); PRINT_HT_CAP((htc->cap & BIT(6)), "RX HT40 SGI"); PRINT_HT_CAP((htc->cap & BIT(7)), "TX STBC"); PRINT_HT_CAP(((htc->cap >> 8) & 0x3) == 0, "No RX STBC"); PRINT_HT_CAP(((htc->cap >> 8) & 0x3) == 1, "RX STBC 1-stream"); PRINT_HT_CAP(((htc->cap >> 8) & 0x3) == 2, "RX STBC 2-streams"); PRINT_HT_CAP(((htc->cap >> 8) & 0x3) == 3, "RX STBC 3-streams"); PRINT_HT_CAP((htc->cap & BIT(10)), "HT Delayed Block Ack"); PRINT_HT_CAP(!(htc->cap & BIT(11)), "Max AMSDU length: " "3839 bytes"); PRINT_HT_CAP((htc->cap & BIT(11)), "Max AMSDU length: " "7935 bytes"); /* * For beacons and probe response this would mean the BSS * does or does not allow the usage of DSSS/CCK HT40. * Otherwise it means the STA does or does not use * DSSS/CCK HT40. */ PRINT_HT_CAP((htc->cap & BIT(12)), "DSSS/CCK HT40"); PRINT_HT_CAP(!(htc->cap & BIT(12)), "No DSSS/CCK HT40"); /* BIT(13) is reserved */ PRINT_HT_CAP((htc->cap & BIT(14)), "40 MHz Intolerant"); PRINT_HT_CAP((htc->cap & BIT(15)), "L-SIG TXOP protection"); p += scnprintf(p, bufsz + buf - p, "ampdu factor/density: %d/%d\n", htc->ampdu_factor, htc->ampdu_density); p += scnprintf(p, bufsz + buf - p, "MCS mask:"); for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) p += scnprintf(p, bufsz + buf - p, " %.2x", htc->mcs.rx_mask[i]); p += scnprintf(p, bufsz + buf - p, "\n"); /* If not set this is meaningless */ if (le16_to_cpu(htc->mcs.rx_highest)) { p += scnprintf(p, bufsz + buf - p, "MCS rx highest: %d Mbps\n", le16_to_cpu(htc->mcs.rx_highest)); } p += scnprintf(p, bufsz + buf - p, "MCS tx params: %x\n", htc->mcs.tx_params); } ret = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); kfree(buf); return ret; } LINK_STA_OPS(ht_capa); static ssize_t link_sta_vht_capa_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { char *buf, *p; struct link_sta_info *link_sta = file->private_data; struct ieee80211_sta_vht_cap *vhtc = &link_sta->pub->vht_cap; ssize_t ret; ssize_t bufsz = 512; buf = kzalloc(bufsz, GFP_KERNEL); if (!buf) return -ENOMEM; p = buf; p += scnprintf(p, bufsz + buf - p, "VHT %ssupported\n", vhtc->vht_supported ? "" : "not "); if (vhtc->vht_supported) { p += scnprintf(p, bufsz + buf - p, "cap: %#.8x\n", vhtc->cap); #define PFLAG(a, b) \ do { \ if (vhtc->cap & IEEE80211_VHT_CAP_ ## a) \ p += scnprintf(p, bufsz + buf - p, \ "\t\t%s\n", b); \ } while (0) switch (vhtc->cap & 0x3) { case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895: p += scnprintf(p, bufsz + buf - p, "\t\tMAX-MPDU-3895\n"); break; case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991: p += scnprintf(p, bufsz + buf - p, "\t\tMAX-MPDU-7991\n"); break; case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454: p += scnprintf(p, bufsz + buf - p, "\t\tMAX-MPDU-11454\n"); break; default: p += scnprintf(p, bufsz + buf - p, "\t\tMAX-MPDU-UNKNOWN\n"); } switch (vhtc->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { case 0: p +